18S analysis

sample.biom.pr2 <- paste0(devdir, "data/18s.emu.all_json_md.biom")
sample.tree.pr2 <- paste0(devdir, "data/18s.emu.tree.nwk")
biom.pr2 <- import_biom(sample.biom.pr2, treefilename = sample.tree.pr2)
colnames(tax_table(biom.pr2)) <- c("Domain", "Subdivision", "Class", "Order", "Family", "Genus", "Species")
sampleid.pr2 <- sample_names(biom.pr2)
labels.biom.pr2 <- sample_data(biom.pr2)$Label
rank_names(biom.pr2)
## [1] "Domain"      "Subdivision" "Class"       "Order"       "Family"     
## [6] "Genus"       "Species"
sample_variables(biom.pr2)
## [1] "BarcodeSequence" "Culture_setup"   "Inoculum"        "Label"          
## [5] "PCR_amplicon"    "Primer"          "ProjectID"       "Time_point"
sample_names(biom.pr2)
##  [1] "flaregas-1"  "flaregas-2"  "flaregas-3"  "flaregas-4"  "flaregas-5" 
##  [6] "flaregas-6"  "flaregas-7"  "flaregas-8"  "flaregas-9"  "flaregas-10"
## [11] "flaregas-11" "flaregas-12" "flaregas-13" "flaregas-14" "flaregas-15"
## [16] "flaregas-16" "flaregas-17" "flaregas-18" "flaregas-19" "flaregas-20"
## [21] "flaregas-21" "flaregas-22" "flaregas-23" "flaregas-24" "flaregas-25"
## [26] "flaregas-26" "flaregas-27" "flaregas-28" "flaregas-29" "flaregas-30"
knitr::kable(as.matrix(sample_data(biom.pr2)))
BarcodeSequence Culture_setup Inoculum Label PCR_amplicon Primer ProjectID Time_point
flaregas-1 BC01 ocean Helsingor H-LC-t0 16S 27F+1492R FAS64881 0
flaregas-2 BC02 light+clean_gas Helsingor H-LC-t1 16S 27F+1492R FAS64881 7
flaregas-3 BC03 light+clean_gas Helsingor H-LC-t3 16S 27F+1492R FAS64881 14
flaregas-4 BC04 ocean Helsingor H-LC-t0 18S ss5+ss3 FAS64881 0
flaregas-5 BC05 light+clean_gas Helsingor H-LC-t1 18S ss5+ss3 FAS64881 7
flaregas-6 BC06 light+clean_gas Helsingor H-LC-t3 18S ss5+ss3 FAS64881 14
flaregas-7 BC07 ocean Helsingor H-LC-t0 18S EukA+EukB FAS64881 0
flaregas-8 BC08 light+clean_gas Helsingor H-LC-t1 18S EukA+EukB FAS64881 7
flaregas-9 BC09 light+clean_gas Helsingor H-LC-t3 18S EukA+EukB FAS64881 14
flaregas-10 BC10 ocean Helsingor H-LC-t0 16S+18S 27F+1492R++EukA+EukB FAS64881 0
flaregas-11 BC01 ocean Hornbaek-deep Deep-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS65981 0
flaregas-12 BC02 ocean Hornbaek-deep Deep-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS65981 0
flaregas-13 BC03 light+clean_gas Hornbaek-deep Deep-LC-t1 16S+18S 27F+1492R++EukA+EukB FAS65981 5
flaregas-14 BC04 light+clean_gas Hornbaek-deep Deep-LC-t2 16S+18S 27F+1492R++EukA+EukB FAS65981 7
flaregas-15 BC05 light+clean_gas Hornbaek-deep Deep-LC-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-16 BC06 light+clean_gas Hornbaek-deep Deep-LC-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-17 BC07 light+used_gas Hornbaek-deep Deep-LU-t1 16S+18S 27F+1492R++EukA+EukB FAS65981 5
flaregas-18 BC08 light+used_gas Hornbaek-deep Deep-LU-t2 16S+18S 27F+1492R++EukA+EukB FAS65981 7
flaregas-19 BC09 light+used_gas Hornbaek-deep Deep-LU-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-20 BC10 light+used_gas Hornbaek-deep Deep-LU-t3 16S+18S 27F+1492R++EukA+EukB FAS65981 11
flaregas-21 BC11 dark+clean_gas Hornbaek-deep Deep-DC-t1 16S+18S 27F+1492R++EukA+EukB FAS64830 5
flaregas-22 BC12 dark+clean_gas Hornbaek-deep Deep-DC-t2 16S+18S 27F+1492R++EukA+EukB FAS64830 7
flaregas-23 BC13 dark+clean_gas Hornbaek-deep Deep-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-24 BC14 dark+clean_gas Hornbaek-deep Deep-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-25 BC15 ocean Hornbaek-shallow Shallow-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS64830 0
flaregas-26 BC16 ocean Hornbaek-shallow Shallow-DC-t0 16S+18S 27F+1492R++EukA+EukB FAS64830 0
flaregas-27 BC17 dark+clean_gas Hornbaek-shallow Shallow-DC-t1 16S+18S 27F+1492R++EukA+EukB FAS64830 5
flaregas-28 BC18 dark+clean_gas Hornbaek-shallow Shallow-DC-t2 16S+18S 27F+1492R++EukA+EukB FAS64830 7
flaregas-29 BC19 dark+clean_gas Hornbaek-shallow Shallow-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11
flaregas-30 BC20 dark+clean_gas Hornbaek-shallow Shallow-DC-t3 16S+18S 27F+1492R++EukA+EukB FAS64830 11

Preprocessing data

Subset eukaryotic microbes (18S)

#remove Mitochondria (Eukaryota:mito::kingdom level), Metazoa, Ichthyosporea, Choanoflagellata, and Fungi (all phylum level) 
biom.filter.pr2 <- subset_taxa(biom.pr2, Domain!="Eukaryota:mito")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Metazoa")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Ichthyosporea")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Choanoflagellata")
biom.filter.pr2 <- subset_taxa(biom.filter.pr2, Subdivision!="Fungi")

#get eukaryotes (18S)
biom.18s <- subset_taxa(biom.filter.pr2, Domain=="Eukaryota")

#remove 16s specific samples
biom.18s <- prune_samples(!(sample_names(biom.18s)) %in% c("flaregas-1","flaregas-2","flaregas-3"), biom.18s)
sampleid.18s <- sample_names(biom.18s)
label.18s <- sample_data(biom.18s)$Label

meta <- rownames_to_column(as.data.frame(as.matrix(biom.18s@sam_data))) %>% dplyr::rename("Sample"="rowname")

##convert counts to integers for alpha diversity measures
biom.18s.int <- biom.18s
otu_table(biom.18s.int) <- otu_table(matrix(as.integer(otu_table(biom.18s)), ncol = nsamples(biom.18s), dimnames = list(taxa_names(biom.18s), sample_names(biom.18s))), taxa_are_rows = taxa_are_rows(biom.18s))

Prevalence filtering

Remove all taxa that only occur in a single sample

biom.18s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1472 taxa and 27 samples ]
## sample_data() Sample Data:       [ 27 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 1472 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1472 tips and 1424 internal nodes ]
#tax_stats(biom.18s)
# Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(biom.18s),
              MARGIN = ifelse(taxa_are_rows(biom.18s), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})
prevdf = data.frame(Prevalence = prev0, TotalAbundance = taxa_sums(biom.18s), tax_table(biom.18s))
ggplot(prevdf, aes(Prevalence)) + geom_bar()

# Define prevalence threshold as 5% of total samples
prevalenceThreshold = 0.05 * nsamples(biom.18s)
prevalenceThreshold
## [1] 1.35
# Execute prevalence filter, using `prune_taxa()` function
biom.18s = prune_taxa((prev0 > prevalenceThreshold), biom.18s)
biom.18s
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1017 taxa and 27 samples ]
## sample_data() Sample Data:       [ 27 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 1017 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1017 tips and 990 internal nodes ]
#tax_stats(biom.18s)

Normalization and transformation

Relative abundance

# Transform to relative abundance. Save as new object.
biom.18s.rel <- biom.18s %>% transform_sample_counts(function(x) {x / sum(x)})
#alternatively run: biom.18s.rel <- microbiome::transform(biom.18s, "compositional")

# visualize relative abundance distribution
d.rel.melt <- melt(otu_table(biom.18s.rel), id.vars = colnames(otu_table(biom.18s.rel))) %>% dplyr::rename("Sample" = "Var2", "Relative abundance" = "value")
d.rel.melt <- inner_join(d.rel.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/rel_abundance-distr-18s.pdf"), width = 14, height = 10)
ggplot(d.rel.melt, aes(`Relative abundance`, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
## png 
##   2

Centered log-ratio transformation

# load the library zCompositions to perform 0 replacement
library(zCompositions)

# we are using the Count Zero Multiplicative approach
# z.warning is threshold used to identify individual rows or columns including an excess of zeros/unobserved values
d.n0 <- cmultRepl(otu_table(biom.18s), method="CZM", label=0, z.warning=0.99)
## No. adjusted imputations:  18593
dim(d.n0)
## [1] 1017   27
# generate the centered log-ratio transformed data
# samples by row
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.18s.clr <- biom.18s
otu_table(biom.18s.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)

# check if microbiome::transform gives us the same
#pseq.comp <- microbiome::transform(biom.18s, "clr")
#d.clr.melt <- melt(otu_table(pseq.comp), id.vars = colnames(otu_table(pseq.comp))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")

# visualize clr distribution
d.clr.melt <- melt(otu_table(biom.18s.clr), id.vars = colnames(otu_table(biom.18s.clr))) %>% dplyr::rename("Sample" = "Var2", "clr" = "value")
d.clr.melt <- inner_join(d.clr.melt, meta, by = "Sample")
pdf(paste0(wdir,"fig/clr-distr-18s.pdf"), width = 14, height = 10)
ggplot(d.clr.melt, aes(clr, Sample, fill=Inoculum)) + geom_boxplot(outliers = F) + coord_flip() + facet_wrap(~ Culture_setup, ncol = 2, scales = "free_x") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
dev.off()
## png 
##   2

Most abundant genera

Maximal relative abundances (>3%) for each Culture setup aggregated for each genus

  temp.18s <- merge_samples(biom.18s, "Culture_setup") %>% tax_glom(taxrank = "Genus") %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt() %>% filter(Abundance>0.03) %>% arrange(Genus) %>% dplyr::arrange(desc(Abundance))
  knitr::kable(as.matrix(temp.18s[,c(2,3,13:17)]))
Sample Abundance Subdivision Class Order Family Genus
light+clean_gas 0.44084499 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Picochlorum
light+used_gas 0.39396842 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Picochlorum
light+used_gas 0.22031143 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Nannochloris
light+clean_gas 0.19163245 Chlorophyta_X Chlorophyceae Chlamydomonadales Chlamydomonadales_X Chlamydomonas
dark+clean_gas 0.18925261 Ciliophora Spirotrichea Euplotia Aspidiscidae Aspidisca
dark+clean_gas 0.17843859 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Picochlorum
light+clean_gas 0.16703950 Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Tetraselmis
dark+clean_gas 0.15025249 Ciliophora Spirotrichea Choreotrichida Strobilidiidae Rimostrombidium
light+used_gas 0.13584400 Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Tetraselmis
ocean 0.12174715 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Picochlorum
light+used_gas 0.11292061 Chlorophyta_X Chlorophyceae Chlamydomonadales Chlamydomonadales_X Chlamydomonadales_XX
ocean 0.07339914 Dinoflagellata Dinophyceae Peridiniales Heterocapsaceae Heterocapsa
ocean 0.07308266 Ciliophora Phyllopharyngea Cyrtophoria_5 Chlamydodontidae Chlamydodon
dark+clean_gas 0.06198952 Chlorophyta_X Chlorophyceae Chlamydomonadales Chlamydomonadales_X Chlamydomonas
ocean 0.05883358 Dinoflagellata Dinophyceae Gymnodiniales Gymnodiniaceae Lepidodinium
dark+clean_gas 0.05873322 Gyrista Chrysophyceae Ochromonadales Ochromonadaceae Spumella
light+clean_gas 0.05840299 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Nannochloris
dark+clean_gas 0.05690629 Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Nannochloris
dark+clean_gas 0.04818719 Ciliophora Oligohymenophorea Scuticociliatia_1 Philasterida Philasterida_X
light+clean_gas 0.04769889 Bigyra Bicoecea Bicoecea_X Bicoecea_XX Bicoecea_XXX
dark+clean_gas 0.04372455 Gyrista Chrysophyceae Paraphysomonadales Paraphysomonadaceae Paraphysomonas
light+used_gas 0.04206976 Bigyra Bicoecea Bicoecea_X Bicoecea_XX Bicoecea_XXX
ocean 0.03399117 Ciliophora Spirotrichea Choreotrichida Strobilidiidae Rimostrombidium
dark+clean_gas 0.03197411 Cercozoa Filosa-Granofilosea Cryptofilida Novel-Gran-2 Novel-Gran-2_X
dark+clean_gas 0.03153655 Bigyra Bicoecea Anoecales Caecitellaceae Caecitellus
ocean 0.03115222 Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-2 Dino-Group-II-Clade-2_X
light+used_gas 0.03114172 Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Chlorodendrales_XX

Community typing with Dirichlet Multinomial Mixtures

set.seed(1)

#https://microbiome.github.io/tutorials/DMM.html
library(DirichletMultinomial)

# prefilter relative abundances for prevalent taxa
biom.18s.rel.genus <- biom.18s.rel %>% tax_glom(taxrank = "Genus")
biom.18s.genus <- biom.18s %>% tax_glom(taxrank = "Genus")
taxa <- core_members(biom.18s.rel.genus, detection = 1/200, prevalence = 2/27) #table(meta$Culture_setup) -> minimum no. samples of ocean~Hornbaek-deep: 6
pseq <- prune_taxa(taxa, biom.18s.genus)
dat <- abundances(pseq)
#rownames(dat) <- paste(tax_table(pseq)[,"Genus"], tax_table(pseq)[,"Species"], sep = "::")
rownames(dat) <- tax_table(pseq)[,"Genus"]
count <- as.matrix(t(dat))

# fit the DMM model
fit <- lapply(1:7, dmn, count = count, verbose=TRUE, seed = 1234567)
## dmn, k=1
##   Soft kmeans
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=2
##   Soft kmeans
##     iteration 10 change 0.002046
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=3
##   Soft kmeans
##     iteration 10 change 0.029834
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=4
##   Soft kmeans
##     iteration 10 change 0.001305
##     iteration 20 change 0.000021
##   Expectation Maximization setup
##   Expectation Maximization
##     iteration 10 change 0.000000
##   Hessian
## dmn, k=5
##   Soft kmeans
##     iteration 10 change 0.058220
##     iteration 20 change 0.000001
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=6
##   Soft kmeans
##     iteration 10 change 0.002857
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
## dmn, k=7
##   Soft kmeans
##     iteration 10 change 0.002111
##   Expectation Maximization setup
##   Expectation Maximization
##   Hessian
# Check model fit with different number of mixture components using standard information criteria
lplc <- base::sapply(fit, DirichletMultinomial::laplace) # AIC / BIC / Laplace
aic  <- base::sapply(fit, DirichletMultinomial::AIC) # AIC / BIC / Laplace
bic  <- base::sapply(fit, DirichletMultinomial::BIC) # AIC / BIC / Laplace
plot(lplc, type="b", xlab="Number of Dirichlet Components", ylab="Model Fit", ylim=c(min(c(lplc,aic,bic), na.rm = T), max(c(lplc,aic,bic), na.rm = T))); lines(aic, type="b", lty = 2); lines(bic, type="b", lty = 3)

# pick the optimal model
best <- fit[[which.min(unlist(lplc))]]

# mixture parameters pi and theta
mixturewt(best)
##          pi    theta
## 1 0.4444444 16.47541
## 2 0.2962963 15.38982
## 3 0.2592593 39.73563
# sample-component assignments
ass <- apply(mixture(best), 1, which.max)
temp.meta <- meta
temp.meta$cluster <- ass
knitr::kable(as.matrix(temp.meta %>% dplyr::select(Culture_setup, Inoculum, Time_point, cluster)))
Culture_setup Inoculum Time_point cluster
ocean Helsingor 0 3
light+clean_gas Helsingor 7 1
light+clean_gas Helsingor 14 1
ocean Helsingor 0 3
light+clean_gas Helsingor 7 1
light+clean_gas Helsingor 14 1
ocean Helsingor 0 3
ocean Hornbaek-deep 0 3
ocean Hornbaek-deep 0 3
light+clean_gas Hornbaek-deep 5 1
light+clean_gas Hornbaek-deep 7 1
light+clean_gas Hornbaek-deep 11 1
light+clean_gas Hornbaek-deep 11 1
light+used_gas Hornbaek-deep 5 1
light+used_gas Hornbaek-deep 7 1
light+used_gas Hornbaek-deep 11 1
light+used_gas Hornbaek-deep 11 1
dark+clean_gas Hornbaek-deep 5 2
dark+clean_gas Hornbaek-deep 7 2
dark+clean_gas Hornbaek-deep 11 2
dark+clean_gas Hornbaek-deep 11 2
ocean Hornbaek-shallow 0 3
ocean Hornbaek-shallow 0 3
dark+clean_gas Hornbaek-shallow 5 2
dark+clean_gas Hornbaek-shallow 7 2
dark+clean_gas Hornbaek-shallow 11 2
dark+clean_gas Hornbaek-shallow 11 2
# Contribution of each taxonomic group to each component
for (k in seq(ncol(fitted(best)))) {
  d <- melt(fitted(best))
  colnames(d) <- c("OTU", "cluster", "value")
  d <- subset(d, cluster == k) %>%
     # Arrange OTUs by assignment strength
     arrange(value) %>%
     mutate(OTU = factor(OTU, levels = unique(OTU))) %>%
     # Only show the most important drivers
     filter(abs(value) > quantile(abs(value), 0.8))     

  p <- ggplot(d, aes(x = OTU, y = value)) +
       geom_bar(stat = "identity") +
       coord_flip() +
       labs(title = paste("Top drivers: community type", k))
  print(p)
}

Preprocess minimap2 abundancies

Different statistical methods (alpha and beta diversity, DEA) require raw read counts instead of probabilities as calculated by Emu.

#get minimap2 filtered raw counts
sample.biom <- paste0(devdir, "data/18s.minimap2.all_json_md.biom")
sample.tree <- paste0(devdir, "data/18s.minimap2.tree.nwk")
biom.m <- import_biom(sample.biom, treefilename = sample.tree)
colnames(tax_table(biom.m)) <- c("Domain", "Supergroup", "Division", "Subdivision", "Class", "Order", "Family", "Genus", "Species")
tax.m <- data.frame(tax_table(biom.m))
tax.clean.m <- data.frame(row.names = row.names(tax.m),
                        Domain = str_replace(tax.m[,1], "D_0__", ""),
                        Supergroup = str_replace(tax.m[,2], "D_1__", ""),
                        Division = str_replace(tax.m[,3], "D_2__", ""),
                        Subdivision = str_replace(tax.m[,4], "D_3__", ""),
                        Class = str_replace(tax.m[,5], "D_4__", ""),
                        Order = str_replace(tax.m[,6], "D_5__", ""),
                        Family = str_replace(tax.m[,7], "D_6__", ""),
                        Genus = str_replace(tax.m[,8], "D_7__", ""),
                        Species = str_replace(tax.m[,9], "D_8__", ""),
                        stringsAsFactors = FALSE)
tax_table(biom.m) <- as.matrix(tax.clean.m)
biom.m <- fix_duplicate_tax(biom.m)

#filter for eukaryotes
biom.filter.pr2.m <- subset_taxa(biom.m, Domain!="Eukaryota:mito")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Metazoa")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Ichthyosporea")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Choanoflagellata")
biom.filter.pr2.m <- subset_taxa(biom.filter.pr2.m, Subdivision!="Fungi")
biom.18s.m <- subset_taxa(biom.filter.pr2.m, Domain=="Eukaryota")
biom.18s.m <- prune_samples(!(sample_names(biom.18s.m)) %in% c("flaregas-1","flaregas-2","flaregas-3"), biom.18s.m)

#preprocess phyloseq object
biom.18s.m.unfiltered <- biom.18s.m
prev0.m = apply(X = otu_table(biom.18s.m),
              MARGIN = ifelse(taxa_are_rows(biom.18s.m), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})
prevdf.m = data.frame(Prevalence = prev0.m, TotalAbundance = taxa_sums(biom.18s.m), tax_table(biom.18s.m))
ggplot(prevdf.m, aes(Prevalence)) + geom_bar()

prevalenceThreshold.m = 0.05 * nsamples(biom.18s.m)
biom.18s.m = prune_taxa((prev0.m > prevalenceThreshold.m), biom.18s.m)
biom.18s.m
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5594 taxa and 27 samples ]
## sample_data() Sample Data:       [ 27 samples by 8 sample variables ]
## tax_table()   Taxonomy Table:    [ 5594 taxa by 9 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 5594 tips and 5309 internal nodes ]
#tax_stats(biom.18s.m)

#centered log-ratio transformation
d.n0 <- cmultRepl(otu_table(biom.18s.m), method="CZM", label=0, z.warning=0.99)
## No. adjusted imputations:  115831
dim(d.n0)
## [1] 5594   27
d.clr <- apply(d.n0, 2, function(x) log(x) - mean(log(x)))
biom.18s.m.clr <- biom.18s.m
otu_table(biom.18s.m.clr) <- otu_table(d.clr, taxa_are_rows = TRUE)

#transform to relative abundance
biom.18s.m.rel <- biom.18s.m %>% transform_sample_counts(function(x) {x / sum(x)})

#genus
#get genus counts
biom.18s.m.genus <- biom.18s.m %>% tax_glom(taxrank = "Genus")

#filter genera that exist also running Emu on minimap2 counts
biom.18s.genus.minimap2emu <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% dplyr::select(Genus) %>% rownames_to_column(var = "SpeciesID") %>% inner_join(tax_table(biom.18s.genus) %>% as.data.frame() %>% dplyr::select(Genus), by = "Genus")

Alpha diversity measure

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

  • Observed .. counts up the number of different taxa you observe in a sample at a given taxonomic level
  • Chao0 .. species richness (similar to observed)
  • Shannon .. estimator for both species richness and evenness; higher means higher uncertainty of predicting which species you would see next if you were to look at another read from this sample
  • Simpson .. Gini-Simpson Index; similar idea to Shannon; based on the probability that two entities (microbes, or reads) taken from the sample at random are of different types

Sample richness in the different culture setups and different time points (emu)

#https://docs.onecodex.com/en/articles/4136553-alpha-diversity
alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")

p <- plot_richness(biom.18s.int, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-18s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.int, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")

#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.18s.int, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.18s.int)
temp$dominant <- tax_table(biom.18s.int)[dominant(biom.18s.int),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))

p <- plot_richness(biom.18s.int, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-18s-emu.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.int, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")

Sample richness in the different culture setups and different time points (minimap2)

alpha_meas = c("Observed", "Chao1", "Shannon", "Simpson")

p <- plot_richness(biom.18s.m.unfiltered, "Culture_setup", "Time_point", measures=alpha_meas)
pdf(paste0(wdir, "fig/alpha-diversity-cultures-18s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Culture_setup, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.m.unfiltered, index = "chao1", x_var = "Culture_setup") + theme_minimal() + theme(legend.position = "none")

#https://microbiome.github.io/tutorials/Alphadiversity.html
tab <-microbiome::alpha(biom.18s.m.unfiltered, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
knitr::kable(as.matrix(tab))
#show the dominant taxonomic group for each sample
temp <- sample_data(biom.18s.m.unfiltered)
temp$dominant <- tax_table(biom.18s.m.unfiltered)[dominant(biom.18s.m.unfiltered),"Species"]
knitr::kable(as.matrix(temp[,c("Culture_setup","dominant")]))

p <- plot_richness(biom.18s.m.unfiltered, "Time_point", "Culture_setup", measures=alpha_meas)
p$data$Time_point <- factor(p$data$Time_point, levels = c("0","5","7","11","14"))
pdf(paste0(wdir, "fig/alpha-diversity-times-18s-minimap2.pdf"), width = 20)
p + geom_boxplot(data=p$data, aes(x=Time_point, y=value, color=NULL), alpha=0.1)
dev.off()
#boxplot_alpha(biom.18s.m.unfiltered, index = "chao1", x_var = "Time_point") + theme_minimal() + theme(legend.position = "none")

Beta diversity measure

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

PCA (emu)

# apply a singular value decomposition to the dataset
# do not use princomp function in R!!
pcx <- prcomp(t(otu_table(biom.18s.clr)), scale. = T)

# generate a scree plot
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/
fviz_eig(pcx, addlabels = TRUE)

# generate biplot (check https://colorbrewer2.org/ for selecting colors)
pdf(paste0(wdir,"fig/clr-pca-18s-emu.pdf"), width = 30, height = 8)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(3,4), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
dev.off()

PCA (minimap2)

#biplot
pcx <- prcomp(t(otu_table(biom.18s.m.clr)), scale. = T)
fviz_eig(pcx, addlabels = TRUE)

# generate biplot (check https://colorbrewer2.org/ for selecting colors)
p1 <- ggbiplot(pcx, groups = meta$Culture_setup, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_brewer(type = "div", palette = 1) + labs(color = "Group")
p2 <- ggbiplot(pcx, groups = meta$Time_point, var.axes = F, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = T, circle = T, ellipse.fill = F, labels = meta$Label, labels.size = 5) + scale_color_manual(values=c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00')) + labs(color = "Group")
pdf(paste0(wdir,"fig/clr-pca-18s-minimap2.pdf"), width = 30, height = 8)
plot(ggarrange(p1, p2, ncol = 2, nrow = 1))
## Too few points to calculate an ellipse
dev.off()


Hierarchical Clustering

Unifrac (emu)

# average-link clustering
# Use unifrac on original compositional data: If your data is truly compositional (relative abundances sum to 1), consider using unifrac directly on the untransformed data
biom.hclust <- hclust(phyloseq::distance(biom.18s, method = "unifrac"), method="average")
biom.hclust$labels <- sample_data(biom.18s)$Label
plot(biom.hclust)

#biom.hclust <- hclust(phyloseq::distance(biom.18s, "wunifrac"), method="average")
#biom.hclust$labels <- sample_data(biom.18s)$Label
#plot(biom.hclust)
#biom.hclust <- hclust(phyloseq::distance(biom.18s, "bray"), method="average")
#biom.hclust$labels <- sample_data(biom.18s)$Label
#plot(biom.hclust)

Unifrac (minimap2)

# average-link clustering
# Use unifrac on original compositional data: If your data is truly compositional (relative abundances sum to 1), consider using unifrac directly on the untransformed data
biom.hclust <- hclust(phyloseq::distance(biom.18s.m, method = "unifrac"), method="average")
biom.hclust$labels <- sample_data(biom.18s)$Label
plot(biom.hclust)

Multivariate community comparison

PERMANOVA

This should not be calculated with low abundancy removed counts (so here we should use minimap2 abundancies).

#https://microbiome.github.io/tutorials/PERMANOVA.html
plot_landscape(biom.18s.m.rel, method = "NMDS", distance = "bray", col = "Culture_setup", size = 3)

plot_landscape(biom.18s.m.rel, method = "NMDS", distance = "bray", col = "Time_point", size = 3)

##https://deneflab.github.io/MicrobeMiseq/demos/mothur_2_phyloseq.html
set.seed(1)

# Calculate bray curtis distance matrix
biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "bray")
#biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "unifrac")
#biom.18s.dist <- phyloseq::distance(biom.18s.m, method = "wunifrac")

# make a data frame from the sample_data
#sampledf <- data.frame(sample_data(biom.18s.m))
sampledf <- meta(biom.18s.m)

#test the hypothesis that the different culture setups have different centroids
# Adonis test (H0 .. different cultures have the same centroid)
perm <- adonis2(biom.18s.dist ~ Culture_setup, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = biom.18s.dist ~ Culture_setup, data = sampledf)
##               Df SumOfSqs      R2      F Pr(>F)    
## Culture_setup  3   3.4497 0.44175 6.0667  0.001 ***
## Residual      23   4.3595 0.55825                  
## Total         26   7.8093 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different cultures have the same dispersion)
beta <- betadisper(biom.18s.dist, sampledf$Culture_setup)
permutest(beta) #alternatively use anova(beta)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     3 0.17440 0.058133 2.1949    999  0.101
## Residuals 23 0.60917 0.026486
#test the hypothesis that the different time points have different centroids
# Adonis test (H0 .. different different time points have the same centroid)
perm <- adonis2(biom.18s.dist ~ Time_point, data = sampledf)
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = biom.18s.dist ~ Time_point, data = sampledf)
##            Df SumOfSqs      R2      F Pr(>F)    
## Time_point  4   3.1961 0.40927 3.8105  0.001 ***
## Residual   22   4.6132 0.59073                  
## Total      26   7.8093 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test (H0 .. different time points have the same dispersion)
beta <- betadisper(biom.18s.dist, sampledf$Time_point)
permutest(beta)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     4 0.18157 0.045392 1.1704    999  0.346
## Residuals 22 0.85325 0.038784

Univariate community comparisons

ANOVA-like differential expression of genera (minimap2)

set.seed(12345)
# test differential expression: (light.clean_gas+light.used_gas) - dark.clean_gas
meta.sub.1 <- meta %>% dplyr::filter(Culture_setup == "light+clean_gas" | Culture_setup == "light+used_gas" | Culture_setup == "dark+clean_gas") %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
mm <- model.matrix(~ Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.18s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1)

aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1)

#aldex.glm.plot(glm.test, eff=glm.eff, contrast='Culture_setuplight+clean_gas', type='volcano', test='effect', cutoff.effect = 1)

#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSdark <- glm.eff$`Culture_setuplight+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]
glm.eff.lightVSdark <- glm.eff.lightVSdark %>% dplyr::filter(rab.all!="NA")

# higher relative abundance under light     
glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% nrow()
## [1] 3
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),]))
Domain Supergroup Division Subdivision Class Order Family Genus Species
DQ079859.1.1794_U Eukaryota Haptista Haptophyta Haptophyta_X Prymnesiophyceae Isochrysidales Isochrysidaceae Tisochrysis NA
KY054995.1.1744_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Tetraselmis NA
EF527133.16.1840_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Chlorodendrales_XX NA
# higher relative abundance in the dark
glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% nrow()
## [1] 11
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),]))
Domain Supergroup Division Subdivision Class Order Family Genus Species
MN945085.1.1704_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadaceae Pedospumella NA
EF633325.1.1748_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadales_X Ochromonadales_XX NA
JQ782092.1.1744_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadaceae Spumella NA
AY620366.1.1247_U Eukaryota TSAR Rhizaria Cercozoa Filosa-Granofilosea Cryptofilida Novel-Gran-2 Novel-Gran-2_X NA
DQ388459.1.1722_U Eukaryota TSAR Alveolata Dinoflagellata Dinophyceae Prorocentrales Prorocentraceae Prorocentrum NA
EF024169.1.1808_U Eukaryota TSAR Rhizaria Cercozoa Filosa-Granofilosea Filosa-Granofilosea_X Novel-Gran-6 Novel-Gran-6_X NA
KU363283.1.1461_U Eukaryota TSAR Alveolata Ciliophora Oligohymenophorea Peritrichia_2 Sessilida Sessilida_X NA
DQ504338.1.1561_U Eukaryota TSAR Alveolata Ciliophora Spirotrichea Euplotia Aspidiscidae Aspidisca NA
AF271998.1.1795_U Eukaryota Obazoa Opisthokonta Filasterea Filasterea_X Ministerida Ministeridae Ministeria NA
MF197366.1.2097_U Eukaryota Amoebozoa Discosea Discosea_X Flabellinia Dactylopodida Paramoebidae Paramoeba NA
AY277797.1.1861_U Eukaryota Amoebozoa Tubulinea Tubulinea_X Elardia Leptomyxida Flabellulidae Paraflabellula NA
temp.1 <- glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-light_clean_usedVSdark-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex, aes(x=Genus, y=diff.btw, color=Subdivision)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + ylab("logFC")
dev.off()
## png 
##   2
for(i in glm.eff.lightVSdark[abs(glm.eff.lightVSdark$effect) >= 1,] %>% row.names()) {
  de.genus <- tax_table(biom.18s.m.genus)[i,"Genus"]
  p <- boxplot_abundance(biom.18s.m.genus, x = "Culture_setup", y = i) + scale_y_log10() + ggtitle(de.genus)
  print(p)
}

set.seed(12345)
# test differential expression: ((light.clean_gas+light.used_gas) - ocean) AND (dark.clean_gas - ocean)
meta.sub.1 <- meta %>% mutate(Culture_setup = replace(Culture_setup, Culture_setup=="light+used_gas", "light+clean_gas"))
meta.sub.1$Culture_setup <- factor(meta.sub.1$Culture_setup, levels = c("ocean", "dark+clean_gas", "light+clean_gas"))
mm <- model.matrix(~0 + Inoculum + Culture_setup, meta.sub.1)
counts.sub <- otu_table(biom.18s.m.genus)[,meta.sub.1$Sample]
x <- aldex.clr(counts.sub, mm, mc.samples=100, verbose=T, gamma=1e-3)
## integer matrix provided
## using all features for denominator
## operating in serial mode
## removed rows with sums equal to zero
## data format is OK
## dirichlet samples complete
## aldex.scaleSim: adjusting samples to reflect scale uncertainty.
## sampling from the default scale model.
glm.test.1 <- aldex.glm(x, mm, fdr.method='BH')
## running tests for each MC instance:
## |------------(25%)----------(50%)----------(75%)----------|
glm.eff.1<- aldex.glm.effect(x)
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
## operating in serial mode
## sanity check complete
## rab.all  complete
## rab.win  complete
## rab of samples complete
## within sample difference calculated
## between group difference calculated
## group summaries calculated
## unpaired effect size calculated
## summarizing output
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)

aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)

#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setuplight+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)
aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MW', test='effect', cutoff.effect = 1.5)

aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='MA', test='effect', cutoff.effect = 1.5)

#aldex.glm.plot(glm.test.1, eff=glm.eff.1, contrast='Culture_setupdark+clean_gas', type='volcano', test='fdr', cutoff.pval = 0.1)

#filter genera that where detected with Emu (filter low abundant genera)
glm.eff.lightVSocean <- glm.eff.1$`Culture_setuplight+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]
glm.eff.darkVSocean <- glm.eff.1$`Culture_setupdark+clean_gas`[biom.18s.genus.minimap2emu$SpeciesID,]

# higher relative abundance in light-incubated cultures than in the ocean
glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% nrow()
## [1] 8
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% row.names(),]))
Domain Supergroup Division Subdivision Class Order Family Genus Species
DQ079859.1.1794_U Eukaryota Haptista Haptophyta Haptophyta_X Prymnesiophyceae Isochrysidales Isochrysidaceae Tisochrysis NA
KC875350.1.1077_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Chlorophyceae Chlamydomonadales Chlamydomonadales_X Dunaliella NA
JF790981.1.1777_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Chlorellales_XX NA
AB058309.1.1749_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Picochlorum NA
AB080302.1.1788_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Trebouxiophyceae Chlorellales Chlorellales_X Nannochloris NA
KY054986.1.1708_U Eukaryota TSAR Stramenopiles Gyrista Eustigmatophyceae Eustigmatophyceae_X Eustigmatophyceae_XX Nannochloropsis NA
KY054995.1.1744_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Tetraselmis NA
EF527133.16.1840_U Eukaryota Archaeplastida Chlorophyta Chlorophyta_X Chlorodendrophyceae Chlorodendrales Chlorodendraceae Chlorodendrales_XX NA
# higher relative abundance in dark-incubated cultures than in the ocean
glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% nrow()
## [1] 7
knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5,] %>% row.names(),]))
Domain Supergroup Division Subdivision Class Order Family Genus Species
MN945085.1.1704_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadaceae Pedospumella NA
EF633325.1.1748_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadales_X Ochromonadales_XX NA
JQ782092.1.1744_U Eukaryota TSAR Stramenopiles Gyrista Chrysophyceae Ochromonadales Ochromonadaceae Spumella NA
AY620366.1.1247_U Eukaryota TSAR Rhizaria Cercozoa Filosa-Granofilosea Cryptofilida Novel-Gran-2 Novel-Gran-2_X NA
EF024169.1.1808_U Eukaryota TSAR Rhizaria Cercozoa Filosa-Granofilosea Filosa-Granofilosea_X Novel-Gran-6 Novel-Gran-6_X NA
DQ504338.1.1561_U Eukaryota TSAR Alveolata Ciliophora Spirotrichea Euplotia Aspidiscidae Aspidisca NA
MF197366.1.2097_U Eukaryota Amoebozoa Discosea Discosea_X Flabellinia Dactylopodida Paramoebidae Paramoeba NA
temp.1 <- glm.eff.lightVSocean[glm.eff.lightVSocean$effect >= 1.5,] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.lightVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-lightVSocean-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.lightVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Subdivision)) +
    geom_point(size=3, width = 0.2) + coord_flip() +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png 
##   2
temp.1 <- glm.eff.darkVSocean[glm.eff.darkVSocean$effect >= 1.5, ] %>% rownames_to_column(var = "SpeciesID")
temp.2 <- tax_table(biom.18s.m.genus) %>% as.data.frame() %>% rownames_to_column(var = "SpeciesID")
res_sig.aldex.darkVSocean <- inner_join(temp.1, temp.2, by = "SpeciesID")
pdf(paste0(wdir, "fig/aldex-darkVSocean-scatter-genus-18s.pdf"), width = 35, height = 13)
ggplot(res_sig.aldex.darkVSocean, aes(x=reorder(Genus, diff.btw), y=diff.btw, color=Subdivision)) +
    geom_point(size=3, width = 0.2) + coord_flip() +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) + labs(y = "logFC", x = "Genus")
dev.off()
## png 
##   2

Differential abundance testing of genera with DESeq2 (minimap2)

As DESeq2 expects raw counts we will use here the minimap2 counts instead of emu counts.

#run DESeq2
#https://micca.readthedocs.io/en/latest/phyloseq.html
sample_data(biom.18s.m.genus)$Culture_setup <- as.factor(sample_data(biom.18s.m.genus)$Culture_setup)
ds <- phyloseq_to_deseq2(biom.18s.m.genus, ~ Inoculum + Culture_setup)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
ds <- DESeq(ds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
alpha <- 0.001

# test differential expression: light.clean_gas - dark.clean_gas
res.1 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "dark+clean_gas"), alpha=alpha)
summary(res.1)
## 
## out of 969 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up)       : 9, 0.93%
## LFC < 0 (down)     : 22, 2.3%
## outliers [1]       : 15, 1.5%
## low counts [2]     : 714, 74%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.1 <- res.1[order(res.1$padj, na.last=NA), ]
res_sig <- res.1[(res.1$padj < alpha), ]
res_sig
## log2 fold change (MLE): Culture_setup light+clean_gas vs dark+clean_gas 
## Wald test p-value: Culture setup light.clean gas vs dark.clean gas 
## DataFrame with 31 rows and 6 columns
##                    baseMean log2FoldChange     lfcSE      stat      pvalue
##                   <numeric>      <numeric> <numeric> <numeric>   <numeric>
## DQ504338.1.1561_U 8428.8226      -11.95311  1.149804 -10.39578 2.59162e-25
## KY320339.1.1613_U    4.4960       16.33856  2.048889   7.97435 1.53184e-15
## EF024169.1.1808_U   64.7769       -7.94853  1.005054  -7.90856 2.60389e-15
## AY620366.1.1247_U 1421.6719       -9.35420  1.209885  -7.73148 1.06307e-14
## AY277797.1.1861_U   12.3586       -6.67033  0.961394  -6.93819 3.97169e-12
## ...                     ...            ...       ...       ...         ...
## KU891599.1.1820_U 1882.6365       -4.27709  1.055190  -4.05339 5.04812e-05
## EF486866.1.1676_U   39.8434       -8.47230  2.093419  -4.04711 5.18534e-05
## EU567232.1.1098_U   11.8458       -5.71658  1.458588  -3.91926 8.88229e-05
## GQ465466.1.1758_U  215.6732        3.63700  0.947935   3.83676 1.24669e-04
## MF197366.1.2097_U   27.2535       -4.81145  1.252632  -3.84108 1.22496e-04
##                          padj
##                     <numeric>
## DQ504338.1.1561_U 6.21990e-23
## KY320339.1.1613_U 1.83821e-13
## EF024169.1.1808_U 2.08311e-13
## AY620366.1.1247_U 6.37844e-13
## AY277797.1.1861_U 1.90641e-10
## ...                       ...
## KU891599.1.1820_U 0.000444458
## EF486866.1.1676_U 0.000444458
## EU567232.1.1098_U 0.000735086
## GQ465466.1.1758_U 0.000965181
## MF197366.1.2097_U 0.000965181
DESeq2::plotMA(res.1, alpha = alpha, ylim = c(-9,9))

res.1 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
##   up down
## 1  9   22
# up down
#1 9   22

res_sig <- cbind(as(res_sig, "data.frame"), as(tax_table(biom.18s.m)[rownames(res_sig), ], "matrix"))
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=log2FoldChange, color=Subdivision)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
#View(cbind(as(res_sig, "data.frame"), as(otu_table(biom.18s.m)[rownames(res_sig), ], "matrix")))

#volcano plot
volDat <- as.data.frame(res.1)
volDat <- volDat[!is.na(volDat$padj), ]
volDat$DGE <- volDat$padj < alpha
pdf(paste0(wdir, "fig/deseq2-light_cleanVSdark_clean-volcano-genus-18s.pdf"), width = 15, height = 13)
ggplot(volDat, aes(x = log2FoldChange, y = -log10(padj), color = DGE)) + 
  geom_point(alpha = 0.50) + 
  scale_color_manual(values=c("TRUE" = "red", "FALSE" = "gray"))
dev.off
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x56483d42b208>
## <environment: namespace:grDevices>


#run DESeq2
# test differential expression: light.clean_gas - light.used_gas
res.2 <- results(ds, contrast=c("Culture_setup", "light+clean_gas", "light+used_gas"), alpha=alpha)
summary(res.2)
## 
## out of 969 with nonzero total read count
## adjusted p-value < 0.001
## LFC > 0 (up)       : 0, 0%
## LFC < 0 (down)     : 0, 0%
## outliers [1]       : 15, 1.5%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res.2 <- res.2[order(res.2$padj, na.last=NA), ]
DESeq2::plotMA(res.2, alpha = alpha, ylim = c(-9,9))

res.2 %>% as.data.frame %>% filter(padj<alpha) %>% summarize(up = sum(log2FoldChange>1), down = sum(log2FoldChange<1))
##   up down
## 1  0    0
#  up down
#1  0    0

Differential abundance testing of genera with edgeR (minimap2)

As edgeR expects raw counts we will use here the minimap2 counts instead of emu counts.

er <- phyloseq2edgeR(biom.18s.m.genus)
#normalize
er <- edgeR::calcNormFactors(er, method = "TMM")
#metadata
meta.sub <- as.data.frame(sample_data(biom.18s.m.genus)[,c(2,3,8)])
meta.sub <- meta.sub %>% as_tibble() %>% mutate(across(.cols = 3, .fns = as.integer)) %>% mutate(across(.cols = 1:2, .fns = make.names))
#estimate dispersion
design <- model.matrix(~0 + Culture_setup + Inoculum, data=meta.sub)
er <- edgeR::estimateDisp(er, design)
plotBCV(er)

#differential expression
fit <- glmQLFit(er, design, robust = TRUE)
plotQLDisp(fit)

# test differential expression: light.clean_gas - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1,0,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
## Down                                                              16
## NotSig                                                           952
## Up                                                                 1
#       -1*Culture_setupdark.clean_gas 1*Culture_setuplight.clean_gas
#Down                                                              19
#NotSig                                                           948
#Up                                                                 2
dge.1 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 1
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logCPM, logFC, FDR)))
Genus logCPM logFC FDR
GQ468539.1.1784_U Cylindrotheca 8.747015 4.290248 0.0007334532
dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 16
knitr::kable(as.matrix(dge.1 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logCPM, logFC, FDR)))
Genus logCPM logFC FDR
EF024169.1.1808_U Novel-Gran-6_X 9.357027 -8.377701 2.830945e-06
DQ504338.1.1561_U Aspidisca 16.386715 -11.917630 4.822431e-06
FJ870103.1.1752_U Cinetochilum 9.236447 -9.528387 1.811784e-05
AY277797.1.1861_U Paraflabellula 7.146863 -6.379301 1.811784e-05
AY620366.1.1247_U Novel-Gran-2_X 13.754814 -9.932116 1.870539e-05
HQ121441.1.1726_U Micrometopion 8.943413 -10.443996 2.157481e-05
AY749517.1.1757_U Pterocystida_XXX 7.995959 -6.593519 3.345444e-05
EF024494.1.1800_U Cercomonas 6.957978 -7.509189 3.679240e-05
EF633325.1.1748_U Ochromonadales_XX 14.868801 -9.999643 7.078945e-05
JQ782092.1.1744_U Spumella 13.158507 -9.682907 8.574012e-05
EU567232.1.1098_U Novel-Gran-1_X 7.152513 -9.238268 8.574012e-05
KM065452.1.1878_U Massisteria 6.896744 -5.069302 8.574012e-05
KU363283.1.1461_U Sessilida_X 9.259330 -6.203220 8.829018e-05
DQ388459.1.1722_U Prorocentrum 8.448156 -4.312935 2.892929e-04
AJ549210.1.1823_U Euplotes 12.152814 -4.144860 3.000959e-04
AF271998.1.1795_U Ministeria 9.258585 -8.557191 8.881408e-04
res_sig <- dge.1 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-light_cleanVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Subdivision)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
qlf <- glmQLFTest(fit, contrast = c(-1,1/2,1/2,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
## Down                                                                                                22
## NotSig                                                                                             946
## Up                                                                                                   1
#       -1*Culture_setupdark.clean_gas 0.5*Culture_setuplight.clean_gas 0.5*Culture_setuplight.used_gas
#Down                                                                                                24
#NotSig                                                                                             943
#Up                                                                                                   2
dge.2 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 1
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
GQ468539.1.1784_U Cylindrotheca 4.121972 0.0006522498
dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 22
knitr::kable(as.matrix(dge.2 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
EF024169.1.1808_U Novel-Gran-6_X -8.459597 2.784921e-07
DQ504338.1.1561_U Aspidisca -10.582575 2.784921e-07
AY749517.1.1757_U Pterocystida_XXX -8.163704 2.784921e-07
AY620366.1.1247_U Novel-Gran-2_X -9.757920 2.784921e-07
EF633325.1.1748_U Ochromonadales_XX -10.972292 3.478802e-07
JQ782092.1.1744_U Spumella -9.926400 3.648485e-07
FJ870103.1.1752_U Cinetochilum -9.244398 5.762508e-07
AY277797.1.1861_U Paraflabellula -5.911446 9.666456e-07
KU363283.1.1461_U Sessilida_X -7.207558 2.516731e-06
EF024494.1.1800_U Cercomonas -7.084886 2.696626e-06
HQ121441.1.1726_U Micrometopion -8.561980 5.495526e-06
KM065452.1.1878_U Massisteria -4.682554 2.083382e-05
EU567232.1.1098_U Novel-Gran-1_X -7.332314 1.052815e-04
AF271998.1.1795_U Ministeria -7.994616 1.052815e-04
KU891599.1.1820_U Paraphysomonas -4.974620 1.052815e-04
DQ388459.1.1722_U Prorocentrum -3.873278 1.052815e-04
MN945085.1.1704_U Pedospumella -11.108575 1.395728e-04
MN317566.1.2078_U Cunea -6.252953 1.575477e-04
JQ967291.1.1656_U Lepidochromonas -7.108951 2.894479e-04
AJ549210.1.1823_U Euplotes -3.564333 3.357458e-04
FJ832156.1.1753_U Lecudina -4.570884 7.868416e-04
MF197366.1.2097_U Paramoeba -4.968286 9.093298e-04
pdf(paste0(wdir, "fig/edger-light_clean_usedVSdark_clean-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(dge.2, aes(x=Genus, y=logFC, color=Subdivision)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2
# test differential expression: light.clean_gas - light.used_gas
qlf <- glmQLFTest(fit, contrast = c(0,1,-1,0,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.01)) #FDR<0.01
##        1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
## Down                                                               0
## NotSig                                                           969
## Up                                                                 0
#       1*Culture_setuplight.clean_gas -1*Culture_setuplight.used_gas
#Down                                                               0
#NotSig                                                           969
#Up                                                                 0
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
qlf <- glmQLFTest(fit, contrast = c(-1/3,-1/3,-1/3,1,0,0))
# create DGE table
summary(decideTests(qlf,p.value=0.001)) #FDR<0.001
##        -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
## Down                                                                                                                                                                   19
## NotSig                                                                                                                                                                851
## Up                                                                                                                                                                     99
#       -0.333333333333333*Culture_setupdark.clean_gas -0.333333333333333*Culture_setuplight.clean_gas -0.333333333333333*Culture_setuplight.used_gas 1*Culture_setupocean
#Down                                                               28
#NotSig                                                            802
#Up                                                                139
dge.3 <- topTags(qlf, n = Inf, p.value = 0.001)$table
dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% nrow()
## [1] 99
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC > 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
FN669510.1.1800_U Gyrodinium 3.212317 1.006479e-07
FN263034.1.1797_U Novel-clade-2_X 2.953095 1.006479e-07
FR874775.1.1777_U Micromonas 2.864596 1.792524e-07
KJ763236.1.1766_U Ostreococcus 3.485785 2.384636e-07
AB252769.1.1676_U MAST-12A 2.854537 3.109848e-07
FR874799.1.1794_U Dino-Group-II-Clade-32_X 3.336879 4.054296e-07
FJ480419.1.1774_U Strombidium 2.955235 4.054296e-07
AY381206.1.1722_U Peronosporomycetes_XXX 2.929331 4.054296e-07
FN690411.1.1710_U Protaspa-lineage_X 2.809405 5.124073e-07
FJ884690.1.1768_U Teleaulax 2.838814 1.001635e-06
LC068842.1.1804_U Biecheleria 2.681285 1.001635e-06
KX783613.1.1556_U Mesodinium 2.956847 1.258742e-06
JX828170.1.1778_U Leptosiphonia 2.930587 1.301870e-06
AY180040.1.1628_U Chlamydodon_1 2.929003 1.453780e-06
EF527106.1.1777_U Strombidiidae_X 2.744290 1.453780e-06
DQ314811.1.1777_U Cryothecomonas 2.733860 1.453780e-06
AY665065.1.1754_U Dinophyceae_XXX 2.671066 1.521996e-06
JQ782085.1.1735_U MAST-1C 3.151734 2.017583e-06
KJ763648.1.1765_U Cryptomonadales_XX 2.705738 2.033536e-06
AB363063.1.1731_U Olpidiopsis 3.351445 2.358088e-06
JX178886.1.1761_U Tintinnopsis_07 2.752226 2.879685e-06
FN263276.1.1796_U Chrysochromulina 3.113689 2.998362e-06
AF290540.1.1819_U Protaspa 2.872285 2.998362e-06
AF372763.1.1585_U Haliphthorales_X 3.369392 3.339443e-06
EF100270.1.1404_U Oblongichytrium 2.952638 3.374036e-06
KC753491.1.1723_U Dysteria 3.050703 5.062278e-06
EF527109.1.1755_U Ceratium 3.020519 5.271572e-06
JN090866.1.1795_U Kathablepharidida_XX 3.040629 5.271572e-06
AJ535168.1.1802_U Skeletonema 2.969831 5.271572e-06
AY129058.1.1785_U Dino-Group-I-Clade-4_X 2.482761 5.718221e-06
KU715759.1.1764_U Eutintinnus 2.976138 6.266128e-06
AB178865.1.1782_U Haliphthoros 3.337262 1.028597e-05
U53128.1.1747_U Rhodomonas 2.900311 1.028597e-05
AF427534.1.1778_U Polysiphonia 3.806366 1.143176e-05
AJ506972.1.1802_U Dinophysis 3.163266 1.253527e-05
HM369786.1.1085_U Dino-Group-II_XX 3.173840 1.371587e-05
AHAL01000301.1474.3265_U Emiliania 3.182640 1.386124e-05
AF274267.1.1752_U Heterocapsa 2.978867 1.471228e-05
AY641564.1.1799_U Alexandrium 2.623470 1.756289e-05
EF492490.1.1787_U Pelagodinium 2.972916 1.846733e-05
KJ759274.1.1796_U Dino-Group-II-Clade-2_X 3.268615 1.953564e-05
AB558956.1.1768_U Mataza 3.183349 2.290759e-05
KC488352.1.1657_U Pyramimonas 2.835315 2.350127e-05
FR874444.1.1814_U MAST-12B 3.002991 2.613072e-05
AF022199.1.1803_U Lepidodinium 2.739249 3.989044e-05
DQ310262.1.1409_U MAST-6_X 2.590359 4.090375e-05
KC488480.1.1694_U Dino-Group-II-Clade-1_X 3.013341 4.516952e-05
AB284572.1.1791_U Lagenidium 3.844777 5.852950e-05
U33137.1.1775_U Schottera 3.167806 7.155069e-05
EU418969.1.1590_U Pentapharsodinium 3.386642 7.192882e-05
KJ763136.1.1804_U Picozoa_XXXXX 2.724075 7.458363e-05
MK629451.1.1737_U Syltodinium 3.212401 7.458363e-05
GQ375265.1.1724_U Baffinella 3.497448 8.729904e-05
HG792066.1.1727_U Ansanella 2.195326 8.729904e-05
AY179974.1.1730_U Gregarinomorphea_X_GRE1_XX 2.932607 1.308539e-04
FR874328.1.1775_U Bathycoccus 2.803300 1.335248e-04
LC322144.1.1754_U Quadricilia 3.981005 1.576621e-04
AB902542.1.1694_U PHYLL_2_X 3.362731 1.639908e-04
FJ832156.1.1753_U Lecudina 3.898038 1.646623e-04
KJ760791.1.1789_U Dino-Group-I-Clade-1_X 3.505662 1.934064e-04
KJ925256.1.1318_U Suctoria_XX 3.772774 2.156799e-04
JX188322.1.1754_U MAST-9D 2.846355 2.156799e-04
JN848814.1.1651_U MAST-3J 2.786115 2.156799e-04
AF022192.1.1803_U Tripos 2.954461 2.668074e-04
KC315810.1.1645_U Vampyrellida_XX 3.425579 4.223104e-04
LC222307.1.1739_U Spiniferites 3.085483 4.223104e-04
AY620311.1.1258_U Filosa-Thecofilosea_XXX 3.422152 4.384032e-04
AF236793.1.1776_U Ceramium 2.929391 4.432661e-04
KJ925383.1.1505_U Thoracosphaeraceae_X 3.792743 4.628649e-04
FJ947040.1.1779_U Warnowia 3.083876 4.910898e-04
AB698451.1.1802_U Gymnoxanthella 3.398731 4.910898e-04
AM408889.1.1796_U Paragymnodinium 3.439374 4.910898e-04
MH319376.1.1669_U Frontonia 3.087373 4.910898e-04
EF492493.1.1788_U Biecheleriopsis 2.936012 4.910898e-04
KF130175.1.1772_U Haptophyta_Clade_HAP3_XXX 3.860196 4.910898e-04
AB622340.1.1685_U op14-lineage_X 4.236172 4.991459e-04
JQ781913.1.1798_U MAST-7A 3.878236 5.059793e-04
FJ868202.1.1749_U Frontonia_1 2.849783 5.112764e-04
AB695524.1.1779_U Paulinella 3.444001 5.331051e-04
AF427531.1.1778_U Neosiphonia 3.265927 5.439223e-04
KJ925201.1.1629_U Askenasia 3.093307 5.917195e-04
JF794039.1.1605_U Navicula 2.715644 6.007697e-04
JX967270.1.1555_U Polykrikos 3.340932 6.653621e-04
KX906568.1.1634_U Synophrya 3.693041 6.772565e-04
AY598674.1.1791_U Pythium 2.927542 6.772565e-04
JQ782065.1.1805_U MAST-4D 3.647399 6.772565e-04
HM749950.1.1540_U Parmales_env_3A 3.568999 6.772565e-04
EF100331.1.1380_U Polyplicarium 3.351292 6.772565e-04
EU718682.1.1770_U Callithamnion 2.811049 6.906068e-04
FN263279.1.1764_U Plagioselmis 3.725092 7.171082e-04
KC779515.1.1801_U Thalassomyxa 2.860095 7.650172e-04
JQ904059.1.1656_U Chlamydodon 2.156034 7.916445e-04
AB252760.1.1723_U NC12B-lineage_X 3.886625 7.933883e-04
KC771153.1.1815_U CCW10-lineage_X 2.643843 8.494295e-04
FJ865206.1.1693_U Acineta 4.120701 8.513066e-04
LC189150.1.1713_U Gephyrocapsa 3.072182 8.723554e-04
AJ561113.1.1822_U Pirsonia 4.358368 8.841337e-04
EF527182.1.1579_U TAGIRI1-lineage_X 2.157436 8.841337e-04
EU371397.1.1818_U Ventricleftida_XX 3.136556 9.035933e-04
dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% nrow()
## [1] 19
knitr::kable(as.matrix(dge.3 %>% filter(FDR<0.001 & logFC < 1) %>% dplyr::select(Genus, logFC, FDR)))
Genus logFC FDR
AY543062.1.1792_U Mychonastes -4.752675 4.054296e-07
KC875350.1.1077_U Dunaliella -6.939005 1.301870e-06
FJ946883.1.1793_U Chlorella -5.908957 1.453780e-06
DQ079859.1.1794_U Tisochrysis -6.280275 2.017583e-06
KY054986.1.1708_U Nannochloropsis -5.667449 5.271572e-06
MT901380.1.1779_U Parietochloris -5.665076 6.930783e-06
X68484.4.1753_U Scherffelia -5.042462 7.049004e-06
AB080302.1.1788_U Nannochloris -5.024995 1.352865e-05
KY054995.1.1744_U Tetraselmis -4.625811 3.035605e-05
AB058309.1.1749_U Picochlorum -4.722150 3.258877e-05
KM020184.1.1749_U Stichococcus -4.670212 6.100902e-05
MT425950.1.1793_U Muriella -4.677366 7.155069e-05
AY121853.1.1913_U Vannella -5.909288 1.308539e-04
FJ810216.1.1797_U Aplanochytrium -4.402574 1.387752e-04
EF527133.16.1840_U Chlorodendrales_XX -3.637523 2.330843e-04
GQ465466.1.1758_U Uronema -4.132117 2.538969e-04
JF790981.1.1777_U Chlorellales_XX -3.131270 2.668074e-04
JQ315589.1.1670_U Scenedesmus -5.523654 2.937352e-04
AF278744.1.2559_R Koliella -3.619979 4.262408e-04
res_sig <- dge.3 %>% filter(FDR<0.001)
pdf(paste0(wdir, "fig/edger-oceanVSinoculum-scatter-genus-18s.pdf"), width = 15, height = 13)
ggplot(res_sig, aes(x=Genus, y=logFC, color=Subdivision)) +
    geom_jitter(size=3, width = 0.2) +
    theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
dev.off()
## png 
##   2


Linear time course trends under light of genera with edgeR (minimap2)

# time course trend analysis
library(splines)
meta.light <- meta.sub %>% filter(Culture_setup == "light.clean_gas" | Culture_setup == "light.used_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Hornbaek.shallow")
X <- ns(meta.light$Time_point, df=1)
design.light <- model.matrix(~X + Inoculum, data=meta.light)
biom.18s.m.light <- prune_samples(sample_names(biom.18s.m.genus) %in% c("flaregas-4","flaregas-5","flaregas-6","flaregas-7","flaregas-8","flaregas-9","flaregas-10","flaregas-11","flaregas-12","flaregas-13","flaregas-14","flaregas-15","flaregas-16","flaregas-17","flaregas-18","flaregas-19","flaregas-20"), biom.18s.m.genus)
er <- phyloseq2edgeR(biom.18s.m.light)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.light)
plotBCV(er)

sqrt(er$common.dispersion)
## [1] 0.9885399
fit <- glmQLFit(er, design.light, robust = TRUE)
plotQLDisp(fit)

qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
##          X
## Down   160
## NotSig 779
## Up      30
#          X
#Down    138
#NotSig  798
#Up       33

#filter genera that where detected with Emu (filter low abundant genera)
dge.4 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.4 <- dge.4[biom.18s.genus.minimap2emu$SpeciesID,]

# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.4.u <- dge.4 %>% filter(FDR<0.01 & logFC > 1)
dge.4.u %>% nrow()
## [1] 11
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-light-genus-18s-1.pdf"), width = 15, height = 15)
par(mfrow=c(2,6))
for(i in 1:nrow(dge.4.u)) {
 FlybaseID <- row.names(dge.4.u)[i]
 Symbol <- dge.4.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.light$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.light$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2

Linear time course trends in the dark of genera with edgeR (minimap2)

# time course trend analysis
library(splines)
meta.dark <- meta.sub %>% filter(Culture_setup == "dark.clean_gas" | Culture_setup == "ocean") %>% filter(Inoculum != "Helsingor")
X <- ns(meta.dark$Time_point, df=1)
design.dark <- model.matrix(~X + Inoculum, data=meta.dark)
biom.18s.m.dark <- prune_samples(sample_names(biom.18s.m.genus) %in% c("flaregas-11","flaregas-12","flaregas-21","flaregas-22","flaregas-23","flaregas-24","flaregas-25","flaregas-26","flaregas-27","flaregas-28","flaregas-29","flaregas-30"), biom.18s.m.genus)
er <- phyloseq2edgeR(biom.18s.m.dark)
er <- edgeR::calcNormFactors(er, method = "TMM")
er <- edgeR::estimateDisp(er, design.dark)
plotBCV(er)

sqrt(er$common.dispersion)
## [1] 0.9729365
fit <- glmQLFit(er, design.dark, robust = TRUE)
plotQLDisp(fit)

qlf <- glmQLFTest(fit, coef=2)
summary(decideTests(qlf, p.value=0.01))
##          X
## Down    21
## NotSig 922
## Up      26
#         X
#Down     4
#NotSig 942
#Up      23

#filter genera that where detected with Emu (filter low abundant genera)
dge.5 <- topTags(qlf, n = Inf, p.value = 1)$table
dge.5 <- dge.5[biom.18s.genus.minimap2emu$SpeciesID,]


# visualize the fitted spline curves for the top four enriched species under cultivation conditions
dge.5.u <- dge.5 %>% filter(FDR<0.01 & logFC > 1)
dge.5.u %>% nrow()
## [1] 25
logCPM.obs <- cpm(er, log=TRUE, prior.count=qlf$prior.count)
logCPM.fit <- cpm(qlf, log=TRUE)
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-18s-up-1.pdf"), width = 15, height = 15)
par(mfrow=c(3,5))
for(i in 1:15) {
 FlybaseID <- row.names(dge.5.u)[i]
 Symbol <- dge.5.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2
pdf(paste0(wdir, "fig/edger-time-course-dark-genus-18s-up-2.pdf"), width = 15, height = 15)
par(mfrow=c(2,5))
for(i in 16:nrow(dge.5.u)) {
 FlybaseID <- row.names(dge.5.u)[i]
 Symbol <- dge.5.u$Genus[i]
 logCPM.obs.i <- logCPM.obs[FlybaseID,]
 logCPM.fit.i <- logCPM.fit[FlybaseID,]
 plot(meta.dark$Time_point, logCPM.obs.i, ylab="log-CPM", xlab="Time point", main=Symbol, pch=16)
 lines(meta.dark$Time_point, logCPM.fit.i, col="red", lwd=2)
}
dev.off()
## png 
##   2


Compare results of DESeq2 and edgeR on genus level

alpha <- 0.001
# test differential expression: light.clean_gas - dark.clean_gas
gene_list <- list(
  DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange<1) %>% rownames,
  edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC<1) %>% rownames
)
#venn.diagram(x = gene_list, filename = "venn.light-cleanVSdark-clean.down.tiff")
p.a <- venn(gene_list)

gene_list <- list(
  DESeq2 = res.1 %>% as.data.frame %>% dplyr::filter(padj < alpha & log2FoldChange>1) %>% rownames,
  edgeR = dge.1 %>% dplyr::filter(FDR < alpha & logFC>1) %>% rownames
)
p.b <- venn(gene_list)

Compare light_clean_usedVSdark results of ALDEX and edgeR on genus level

alpha <- 0.001
# test differential expression: (light.clean_gas + light.used_gas) - dark.clean_gas
gene_list <- list(
  aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1,] %>% row.names(), 
  edgeR = dge.2 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.a <- venn(gene_list)

knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[attr(p.a,"intersections")$aldex,]))
gene_list <- list(
  aldex = glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1,] %>% row.names(), 
  edgeR = dge.2 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)

knitr::kable(as.matrix(tax_table(biom.18s.m.genus)[attr(p.a,"intersections")$aldex,]))

Compare results of ocean VS inoculum and time course tends in edgeR on genus level

alpha <- 0.001
# test differential expression: ocean - (light.clean_gas + light.used_gas + dark.clean_gas)/3
#light
gene_list <- list(
  edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
  edgeR.splines = dge.4 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.a <- venn(gene_list)

gene_list <- list(
  edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
  edgeR.splines = dge.4 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.b <- venn(gene_list)

#dark
gene_list <- list(
  edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC > 1) %>% rownames,
  edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC < 1) %>% rownames
)
p.c <- venn(gene_list)

gene_list <- list(
  edgeR.contrast = dge.3 %>% filter(FDR < alpha & logFC < 1) %>% rownames,
  edgeR.splines = dge.5 %>% filter(FDR < alpha & logFC > 1) %>% rownames
)
p.d <- venn(gene_list)

Network construction and characterization

Semi-Parametric Rank-based approach for INference in Graphical model (SPRING)

#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
## Loading required package: SpiecEasi
## 
## Attaching package: 'SpiecEasi'
## The following object is masked from 'package:MASS':
## 
##     fitdistr
## 
net_spring <- netConstruct(biom.18s,
                           filtTax = "highestFreq",
                           filtTaxPar = list(highestFreq = 50),
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ...
## Done.
## Data filtering ...
## 967 taxa removed.
## 50 taxa and 27 samples remaining.
## 
## Calculate 'spring' associations ... 
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust vegan
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_spring_unsigned <- netConstruct(data = net_spring$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_spring
props_spring <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.19151
## Modularity                0.55602
## Positive edge percentage 81.25000
## Edge density              0.06531
## Natural connectivity      0.02568
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.97854
## Average path length**     2.70507
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                    
## name: 1 2 3 4 5 6 7
##    #: 7 7 7 9 6 5 9
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````       
##  141540
##  153179
##  24038 
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##         
## 153179 8
## 141540 6
## 163227 5
## 135033 5
## 163252 5
## 
## Betweenness centrality (normalized):
##               
## 162601 0.16497
## 163227 0.16327
## 153179 0.15051
## 140161 0.15051
## 40675  0.13520
## 
## Closeness centrality (normalized):
##               
## 153179 0.64102
## 141540 0.62472
## 40675  0.60451
## 140161 0.58343
## 163227 0.57828
## 
## Eigenvector centrality (normalized):
##               
## 153179 1.00000
## 141540 0.91042
## 24038  0.59247
## 6548   0.56813
## 40675  0.52545
pdf(paste0(wdir, "fig/network-spring-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          title1 = "Network on species level with SPRING associations",
          showTitle = TRUE,
          cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
##      75% 
## 0.342628
biom.18s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.18s.genus), decreasing = TRUE)[1:50]), biom.18s.genus)
tax_table(biom.18s.genus.top50) <- tax_table(biom.18s.genus.top50) %>% as.data.frame() %>% mutate(Genus = str_remove(Genus, "_X*$")) %>% as.matrix()
net_spring_genus <- netConstruct(biom.18s.genus.top50,
                           #filtTax = "highestFreq",
                           #filtTaxPar = list(highestFreq = 50),
                           taxRank = "Genus",
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 27 samples remaining.
## 
## Calculate 'spring' associations ...
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## The input is identified as the covariance matrix.
## Conducting Meinshausen & Buhlmann graph estimation (mb)....done
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_spring_genus_unsigned <- netConstruct(data = net_spring_genus$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_spring_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_spring_genus
props_spring_genus <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           #hubPar = "eigenvector",
                           hubPar = "betweenness",
                           hubQuant = 0.9,
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring_genus, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.21232
## Modularity                0.56673
## Positive edge percentage 83.72093
## Edge density              0.07020
## Natural connectivity      0.02633
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.97610
## Average path length**     2.63212
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                   
## name: 1 2  3 4 5 6
##    #: 9 9 12 6 9 5
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````                  
##  Caecitellus      
##  Heterocapsa      
##  Protaspa-lineage 
##  Rimostrombidium  
##  Telonemia-Group-1
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##                    
## Rimostrombidium   8
## Philasterida      6
## Telonemia-Group-1 6
## Lepidodinium      5
## Aspidisca         5
## 
## Betweenness centrality (normalized):
##                          
## Rimostrombidium   0.27296
## Protaspa-lineage  0.24490
## Telonemia-Group-1 0.20238
## Caecitellus       0.17687
## Heterocapsa       0.16922
## 
## Closeness centrality (normalized):
##                          
## Rimostrombidium   0.65456
## Caecitellus       0.62236
## Telonemia-Group-1 0.58551
## Caecitellaceae    0.57356
## Protaspa-lineage  0.56798
## 
## Eigenvector centrality (normalized):
##                        
## Rimostrombidium 1.00000
## Novel-Gran-2    0.87898
## Spumella        0.84978
## Philasterida    0.81381
## Aspidisca       0.80364
pdf(paste0(wdir, "fig/network-spring-genus-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          colorVec =  c("#E69F00", "#56B4E9", "#4daf4a", "#009960", "#CC79A7", "#804000", "#999999"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 2.4,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on species level with SPRING associations",
          showTitle = FALSE,
          #cexTitle = 2.3,
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.4, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
##       75% 
## 0.3820446
knitr::kable(as.matrix(sort(colSums(net_spring_genus$normCounts1), decreasing = TRUE)[1:20]))
Picochlorum 693010.57
Tetraselmis 225314.66
Nannochloris 202237.24
Chlamydomonas 194650.26
Aspidisca 115217.73
Rimostrombidium 111952.60
Chlamydomonadales 67824.76
Bicoecea 59376.22
Spumella 35457.50
Paraphysomonas 34067.75
Chlorodendrales 33343.15
Chlamydodon 31290.70
Heterocapsa 31027.28
Philasterida 29921.38
Lepidodinium 24603.54
Caecitellus 22679.89
Aplanochytrium 21554.37
Novel-Gran-2 19322.87
Dino-Group-II-Clade-2 13027.69
Alexandrium 11021.24

SPRING for time point 11 hours

#https://github.com/stefpeschel/NetCoMi
library(NetCoMi)
biom.18s.genus.top50.11hr <- prune_samples(sample_data(biom.18s.genus.top50) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% filter(Time_point == 11) %>% rownames(), biom.18s.genus.top50)

net_spring_genus.11 <- netConstruct(biom.18s.genus.top50.11hr,
                           #filtTax = "highestFreq",
                           #filtTaxPar = list(highestFreq = 50),
                           taxRank = "Genus",
                           measure = "spring",
                           measurePar = list(nlambda=50, 
                                             rep.num=50,
                                             Rmethod = "approx"),
                           normMethod = "none", 
                           zeroMethod = "none",
                           sparsMethod = "none", 
                           dissFunc = "signed",
                           verbose = 3,
                           seed = 123456)
## Checking input arguments ... Done.
## 50 taxa and 8 samples remaining.
## 
## Calculate 'spring' associations ... 
## Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_spring_genus.11$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

temp.nw <- net_spring_genus.11
props_spring_genus.11 <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_spring_genus.11, numbNodes = 5L))
## 
## Component sizes
## ```````````````                 
## size: 23 6 3 2  1
##    #:  1 1 3 1 10
## ______________________________
## Global network properties
## `````````````````````````
## Largest connected component (LCC):
##                                  
## Relative LCC size         0.46000
## Clustering coefficient    0.00000
## Modularity                0.60744
## Positive edge percentage 77.27273
## Edge density              0.08696
## Natural connectivity      0.05491
## Vertex connectivity       1.00000
## Edge connectivity         1.00000
## Average dissimilarity*    0.97303
## Average path length**     3.39738
## 
## Whole network:
##                                  
## Number of components     16.00000
## Clustering coefficient    0.22232
## Modularity                0.77006
## Positive edge percentage 86.11111
## Edge density              0.02939
## Natural connectivity      0.02327
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                              
## name:  0 1 2 3 4 5 6 7 8 9 10
##    #: 10 6 5 5 5 4 4 3 3 3  2
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````                
##  Chlorodendrales
##  Nannochloris   
##  Spumella       
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Centrality of disconnected components is zero
## ````````````````````````````````````````````````
## Degree (unnormalized):
##                  
## Aspidisca       3
## Strombidium     3
## Flabellula      3
## Chlorodendrales 3
## Nannochloris    3
## 
## Betweenness centrality (normalized):
##                        
## Spumella        0.68831
## Chlorodendrales 0.53680
## Aspidisca       0.43723
## Nannochloris    0.38528
## Paraphysomonas  0.36797
## 
## Closeness centrality (normalized):
##                        
## Spumella        0.58881
## Chlorodendrales 0.57152
## Aspidisca       0.54571
## Nannochloris    0.53206
## Paraphysomonas  0.51210
## 
## Eigenvector centrality (normalized):
##                        
## Chlorodendrales 1.00000
## Spumella        0.99112
## Nannochloris    0.87986
## Aspidisca       0.80774
## Paraphysomonas  0.63081
pdf(paste0(wdir, "fig/network-spring-genus-11hr-18s.pdf"), width = 30, height = 15)
p <- plot(props_spring_genus.11,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.01,
          nodeColor = "cluster",
          colorVec =  c("#804000", "#56B4E9", "#009960", "#999999", "#357090", "#CC79A7", "#E69F00"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on genus level with Pearson correlations",
          showTitle = FALSE,
          #cexTitle = 2.3,
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
##       75% 
## 0.3415613
knitr::kable(as.matrix(sort(colSums(net_spring_genus.11$normCounts1), decreasing = TRUE)[1:20]))
Picochlorum 263986.958
Nannochloris 137097.613
Chlamydomonas 126936.645
Tetraselmis 69974.078
Chlamydomonadales 57486.777
Aspidisca 50485.400
Paraphysomonas 22702.979
Chlorodendrales 17430.882
Novel-Gran-2 13658.268
Rimostrombidium 4439.017
Aplanochytrium 3115.442
Perkinsida 2678.884
Bicoecea 1996.169
Vannella 1939.600
Philasterida 1936.981
Chlorococcum 1914.004
Euplotes 1399.536
Flabellula 1373.835
Heterocapsa 1225.493
Chlamydodon 1103.903

Pearson’s correlation

#While with the “signed” transformation, positive correlated taxa are likely to belong to the same cluster, with the “unsigned” transformation clusters contain strongly positive and negative correlated taxa.
net_pears <- netConstruct(biom.18s,  
                          filtTax = "highestFreq",
                          filtTaxPar = list(highestFreq = 50),
                          measure = "pearson",
                          normMethod = "clr",
                          zeroMethod = "multRepl",
                          sparsMethod = "threshold",
                          thresh = 0.3,
                          dissFunc = "signed",
                          verbose = 3)
## Checking input arguments ... Done.
## Data filtering ...
## 967 taxa removed.
## 50 taxa and 27 samples remaining.
## 
## Zero treatment:
## Execute multRepl() ... Done.
## 
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
## 
## Calculate 'pearson' associations ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_pears_unsigned <- netConstruct(data = net_pears$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_pears
props_pears <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE
                          )

#?summary.microNetProps
print(summary(props_pears, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.71660
## Modularity                0.03747
## Positive edge percentage 42.19653
## Edge density              0.56490
## Natural connectivity      0.16225
## Vertex connectivity       5.00000
## Edge connectivity         5.00000
## Average dissimilarity*    0.81463
## Average path length**     1.03688
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##               
## name:  1  2  3
##    #: 13 23 14
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````       
##  141540
##  153179
##  156707
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##          
## 203538 40
## 160777 39
## 162601 38
## 163392 37
## 159937 36
## 
## Betweenness centrality (normalized):
##               
## 45558  0.06548
## 203538 0.02466
## 82603  0.02211
## 159937 0.01871
## 162601 0.01701
## 
## Closeness centrality (normalized):
##               
## 141540 1.56384
## 153179 1.54227
## 156707 1.49398
## 68906  1.44441
## 36687  1.43924
## 
## Eigenvector centrality (normalized):
##               
## 141540 1.00000
## 156707 0.97978
## 153179 0.96949
## 68906  0.96553
## 156946 0.95502
pdf(paste0(wdir, "fig/network-pearson-18s.pdf"), width = 30, height = 15)
p <- plot(props_pears,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.001,
          nodeColor = "cluster",
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          title1 = "Network on species level with Pearson correlations",
          showTitle = TRUE,
          cexTitle = 2.3)
legend(0.57, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#009900","red"), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## [1] 0.6214397
biom.18s.genus.top50 <- prune_taxa(names(sort(taxa_sums(biom.18s.genus), decreasing = TRUE)[1:50]), biom.18s.genus)
tax_table(biom.18s.genus.top50) <- tax_table(biom.18s.genus.top50) %>% as.data.frame() %>% mutate(Genus = str_remove(Genus, "_X*$")) %>% as.matrix()
net_pears_genus <- netConstruct(biom.18s.genus.top50,
                          taxRank = "Genus",
                          measure = "pearson",
                          zeroMethod = "multRepl",
                          normMethod = "clr",
                          sparsMethod = "threshold",
                          thresh = 0.3,
                          verbose = 3)
## Checking input arguments ... Done.
## 50 taxa and 27 samples remaining.
## 
## Zero treatment:
## Execute multRepl() ... Done.
## 
## Normalization:
## Execute clr(){SpiecEasi} ... Done.
## 
## Calculate 'pearson' associations ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "signed" transformation (values different from 0 and 1 will be edges in the network):
hist(net_pears_genus$assoMat1, 100, xlim = c(-1, 1), ylim = c(0, 400),
     xlab = "Estimated correlation", 
     main = "Estimated correlations after sparsification")

net_pears_genus_unsigned <- netConstruct(data = net_pears_genus$assoEst1,
                           dataType = "correlation",
                           sparsMethod = "threshold",
                           thresh = 0.3,
                           dissFunc = "unsigned",
                           verbose = 3)
## Checking input arguments ... Done.
## 
## Sparsify associations via 'threshold' ... Done.
#Adjacency values computed using the "unsigned" transformation:
hist(net_pears_genus_unsigned$adjaMat1, 100, ylim = c(0, 400),
     xlab = "Adjacency values", 
     main = "Adjacencies (with \"unsigned\" transformation)")

temp.nw <- net_pears_genus
props_pears_genus <- netAnalyze(temp.nw, 
                           centrLCC = TRUE,
                           clustMethod = "cluster_fast_greedy",
                           hubPar = "eigenvector",
                           weightDeg = FALSE, normDeg = FALSE)

#?summary.microNetProps
print(summary(props_pears_genus, numbNodes = 5L))
## 
## Component sizes
## ```````````````        
## size: 50
##    #:  1
## ______________________________
## Global network properties
## `````````````````````````
## Whole network:
##                                  
## Number of components      1.00000
## Clustering coefficient    0.72005
## Modularity                0.06084
## Positive edge percentage 46.69540
## Edge density              0.56816
## Natural connectivity      0.18639
## Vertex connectivity       6.00000
## Edge connectivity         6.00000
## Average dissimilarity*    0.80731
## Average path length**     1.03226
## -----
## *: Dissimilarity = 1 - edge weight
## **: Path length = Units with average dissimilarity
## 
## ______________________________
## Clusters
## - In the whole network
## - Algorithm: cluster_fast_greedy
## ```````````````````````````````` 
##                 
## name:  1 2  3  4
##    #: 12 7 17 14
## 
## ______________________________
## Hubs
## - In alphabetical/numerical order
## - Based on empirical quantiles of centralities
## ```````````````````````````````````````````````              
##  MAST-12A     
##  Strombidiidae
##  Teleaulax    
## 
## ______________________________
## Centrality measures
## - In decreasing order
## - Computed for the complete network
## ````````````````````````````````````
## Degree (unnormalized):
##                 
## Strombidiidae 42
## Teleaulax     40
## Strombidium   38
## Dinophyceae   37
## Ostreococcus  37
## 
## Betweenness centrality (normalized):
##                             
## Chlorococcum         0.03827
## Nannochloris         0.02806
## Strombidiidae        0.02381
## Lepidodinium         0.02296
## Dino-Group-I-Clade-4 0.02041
## 
## Closeness centrality (normalized):
##                             
## Strombidiidae        1.52595
## MAST-12A             1.51207
## Strombidium          1.49981
## Dino-Group-I-Clade-4 1.47805
## Ostreococcus         1.47145
## 
## Eigenvector centrality (normalized):
##                      
## Strombidiidae 1.00000
## MAST-12A      0.97247
## Teleaulax     0.95853
## Ostreococcus  0.94598
## Strombidium   0.94585
pdf(paste0(wdir, "fig/network-pearson-genus-18s.pdf"), width = 30, height = 15)
p <- plot(props_pears_genus,
          edgeInvisFilter = "threshold",
          edgeInvisPar = 0.5,
          nodeColor = "cluster",
          colorVec =   c("#009960", "#CC79A7", "#357090", "#804000"),
          nodeSize = "eigenvector",
          repulsion = 0.8,
          rmSingles = TRUE,
          labelScale = FALSE,
          cexLabels = 1.6,
          nodeSizeSpread = 3,
          cexNodes = 2,
          hubBorderCol = "darkgray",
          #title1 = "Network on genus level with Pearson correlations",
          showTitle = FALSE,
          #cexTitle = 2.3)
          posCol = "#0072B2",
          negCol = '#e41a1c')
legend(0.75, 1.1, cex = 2.2, title = "estimated association:",
       legend = c("+","-"), lty = 1, lwd = 3, col = c("#0072B2", '#e41a1c'), 
       bty = "n", horiz = FALSE)
dev.off()
## png 
##   2
#different transparency value is added to edges with an absolute weight below and above the `cut` value
print(p$q1$Arguments$cut)
## [1] 0.6300445
knitr::kable(as.matrix(sort(colSums(net_pears_genus$normCounts1), decreasing = TRUE)[1:20]))
Picochlorum 126.1917044
Tetraselmis 88.5317644
Nannochloris 83.3043019
Chlamydomonas 54.2694073
Rimostrombidium 36.6920230
Chlorodendrales 34.1162137
Aspidisca 24.2932602
Chlamydomonadales 19.7186379
Aplanochytrium 16.3574961
Chlamydodon 12.9062433
Paraphysomonas 11.4957146
Heterocapsa 3.5317635
Lepidodinium 0.9101461
Vannella -2.1349854
Euplotes -2.5459088
Bicoecea -2.9654446
Novel-Gran-2 -3.4083407
Philasterida -6.0462936
Caecitellus -6.6927218
Kryptoperidinium -7.9204505

Compare genera that were significant in differential abundance analysis

#lightVSocean: 8
my.genus.1 <- res_sig.aldex.lightVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.1$lightVSocean <- 1
#lightVSdark: 15
my.genus.2 <- tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect >= 1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.2$lightVSdark <- 1
#light.time.trend: 11
my.genus.3 <- dge.4.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.3$light.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.1, y = my.genus.2, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.3, by = "Genus", all = TRUE)

biom.18s.genus.1 <- biom.18s.genus
sample_data(biom.18s.genus.1) <- sample_data(biom.18s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.18s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
  temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
  temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.light.18s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.light.18s))
Genus lightVSocean lightVSdark light.time.trend ocean_0 light+clean_gas_5 light+clean_gas_7 light+clean_gas_11 light+clean_gas_14 light+used_gas_5 light+used_gas_7 light+used_gas_11 dark+clean_gas_5 dark+clean_gas_7 dark+clean_gas_11
Catena NA NA 1 0.000000e+00 0.0003379009 0.000000e+00 0.000000e+00 0.0000000000 0.0001782844 0.0000000000 0.000000e+00 0.0000000000 0.0000000000 5.657273e-05
Chlorella NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 1.149772e-03 0.0000000000 0.0000000000 7.947646e-05
Chlorellales_XX 1 NA NA 7.063648e-04 0.0046902999 5.041327e-04 4.516628e-04 0.0005916876 0.0046259276 0.0005658150 4.188787e-04 0.0000000000 0.0000000000 4.898143e-04
Chlorodendrales_XX 1 1 NA 1.392823e-02 0.0049646976 1.895699e-02 1.494706e-02 0.0051025601 0.0053314150 0.0211667699 4.223737e-02 0.0016048494 0.0007600987 4.753357e-03
Dunaliella 1 NA 1 0.000000e+00 0.0000000000 0.000000e+00 1.751160e-04 0.0000000000 0.0000000000 0.0000000000 5.038340e-05 0.0000000000 0.0000000000 0.000000e+00
Moneuplotes NA NA 1 0.000000e+00 0.0004341113 2.292957e-04 4.912807e-04 0.0000000000 0.0000000000 0.0000000000 1.708029e-04 0.0000000000 0.0009557372 8.791467e-05
Nannochloris 1 NA 1 2.828345e-02 0.0283297264 5.110312e-02 1.023234e-01 0.0426235538 0.0794856715 0.1836846907 2.750722e-01 0.0056415730 0.0058656923 1.059089e-01
Nannochloropsis 1 NA 1 0.000000e+00 0.0002302709 4.918582e-04 0.000000e+00 0.0011175651 0.0002165651 0.0000000000 4.838714e-05 0.0000000000 0.0000000000 4.529654e-04
Picochlorum 1 NA 1 1.217472e-01 0.2477550416 4.788249e-01 3.186011e-01 0.6424564080 0.4252928376 0.5368042925 3.380542e-01 0.0141721002 0.0304777948 3.292703e-01
Scenedesmus NA NA 1 6.565928e-05 0.0012813198 5.262908e-04 7.129171e-04 0.0000000000 0.0000000000 0.0000000000 3.488916e-04 0.0000000000 0.0000000000 3.452883e-04
Stichococcus NA NA 1 0.000000e+00 0.0000000000 3.358388e-05 7.903332e-05 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.0000000000 0.0000000000 0.000000e+00
Tetraselmis 1 1 1 2.983400e-02 0.0578165933 2.160364e-01 4.382103e-02 0.2877727088 0.0579401149 0.1589012787 1.520716e-01 0.0028844458 0.0107986014 4.702055e-02
Tisochrysis 1 1 NA 0.000000e+00 0.0000000000 4.610344e-04 7.707019e-05 0.0008392871 0.0000000000 0.0001981126 0.000000e+00 0.0000000000 0.0000000000 0.000000e+00
Uronema NA NA 1 8.885796e-05 0.0003462671 4.739865e-04 4.149361e-03 0.0065497112 0.0005253322 0.0000000000 0.000000e+00 0.0001357417 0.0004207827 6.370553e-05
#darkVSocean: 7
my.genus.4 <- res_sig.aldex.darkVSocean %>% dplyr::select(Genus) %>% as.data.frame()
my.genus.4$darkVSocean <- 1
#darkVSlight: 24
my.genus.5 <- tax_table(biom.18s.m.genus)[glm.eff.lightVSdark[glm.eff.lightVSdark$effect <= -1.5,] %>% row.names(),] %>% as.data.frame() %>% dplyr::select(Genus)
my.genus.5$darkVSlight <- 1
#dark.time.trend: 25
my.genus.6 <- dge.5.u %>% dplyr::select(Genus) %>% as.data.frame
my.genus.6$dark.time.trend <- 1
#merge
my.genus <- merge(x = my.genus.4, y = my.genus.5, by = "Genus", all = TRUE)
my.genus <- merge(x = my.genus, y = my.genus.6, by = "Genus", all = TRUE)

temp.abund <- data.frame()
for(i in 1:length(my.genus$Genus)) {
  temp.out <- temp.genus %>% dplyr::filter(Genus %in% my.genus$Genus[i]) %>% dplyr::select(Sample,Abundance,Genus)
  temp.abund <- rbind(temp.abund, temp.out)
}
my.genus.abund <- reshape2::acast(temp.abund, Genus~Sample, value.var = "Abundance")[,c(11,6,7,4,5,9,10,8,2,3,1)] %>% as.data.frame() %>% tibble::rownames_to_column(var = "Genus")
my.genus.dark.18s <- merge(x = my.genus, y = my.genus.abund, by = "Genus", all = TRUE)
knitr::kable(as.matrix(my.genus.dark.18s))
Genus darkVSocean darkVSlight dark.time.trend ocean_0 light+clean_gas_5 light+clean_gas_7 light+clean_gas_11 light+clean_gas_14 light+used_gas_5 light+used_gas_7 light+used_gas_11 dark+clean_gas_5 dark+clean_gas_7 dark+clean_gas_11
Aplanochytrium NA NA 1 3.383009e-03 0.0413115407 2.956411e-02 1.131662e-03 0.0004549487 0.0133246541 0.0062936916 2.219848e-03 1.997691e-03 0.0066310122 7.312849e-03
Aspidisca 1 1 NA 7.740166e-03 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0015227940 0.0000000000 0.000000e+00 2.018106e-01 0.2277381156 1.669369e-01
Chlamydomonadales_XX NA NA 1 2.390505e-03 0.0060157993 3.394457e-03 5.491708e-03 0.0000000000 0.0269921719 0.0458881429 1.608431e-01 7.408895e-04 0.0022946003 1.981292e-02
Chlamydomonas NA NA 1 1.207866e-02 0.0716771601 1.560514e-01 4.863801e-01 0.0001052672 0.0110994902 0.0141732120 1.174826e-02 3.676280e-03 0.0151075989 1.132940e-01
Chlorella NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 1.149772e-03 0.000000e+00 0.0000000000 7.947646e-05
Chlorococcum NA NA 1 1.411588e-04 0.0172536419 2.918188e-03 1.953060e-03 0.0000000000 0.0010855725 0.0000000000 0.000000e+00 0.000000e+00 0.0012145155 5.147376e-03
Cochliopodium NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0001271934 0.0000000000 0.000000e+00 7.197856e-05 0.0003639988 1.785318e-04
Dunaliella NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 1.751160e-04 0.0000000000 0.0000000000 0.0000000000 5.038340e-05 0.000000e+00 0.0000000000 0.000000e+00
Mayorella NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 3.173417e-04
Ministeria NA 1 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0001608826 2.972564e-03
Mychonastes NA NA 1 0.000000e+00 0.0000000000 1.026474e-03 0.000000e+00 0.0013384336 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.000000e+00
Nannochloris NA NA 1 2.828345e-02 0.0283297264 5.110312e-02 1.023234e-01 0.0426235538 0.0794856715 0.1836846907 2.750722e-01 5.641573e-03 0.0058656923 1.059089e-01
Nannochloropsis NA NA 1 0.000000e+00 0.0002302709 4.918582e-04 0.000000e+00 0.0011175651 0.0002165651 0.0000000000 4.838714e-05 0.000000e+00 0.0000000000 4.529654e-04
Novel-Gran-1_X NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 7.508567e-05 0.0001195329 4.704850e-04
Novel-Gran-2_X 1 1 1 1.004975e-03 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 2.963091e-03 0.0397310904 4.516295e-02
Novel-Gran-6_X 1 1 1 3.152808e-04 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 4.098540e-04 0.0008177671 2.654930e-03
Ochromonadales_XX 1 1 NA 1.489642e-03 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 1.907350e-03 0.0525614854 1.675446e-03
Paraflabellula NA 1 NA 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0006258056 5.711322e-05
Paramoeba 1 1 NA 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0012742638 3.227980e-04
Paraphysomonas NA NA 1 1.877638e-02 0.0042827382 1.063238e-04 1.541404e-04 0.0000000000 0.0025345895 0.0000000000 2.003796e-04 3.085954e-03 0.0229186572 7.476929e-02
Pedospumella 1 1 NA 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0010477237 0.000000e+00
Perkinsida_XXX NA NA 1 5.582823e-04 0.0048992100 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 1.201850e-04 0.0001778177 8.858099e-03
Picochlorum NA NA 1 1.217472e-01 0.2477550416 4.788249e-01 3.186011e-01 0.6424564080 0.4252928376 0.5368042925 3.380542e-01 1.417210e-02 0.0304777948 3.292703e-01
Prorocentrum NA 1 1 6.040541e-04 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 1.052576e-04 0.0000000000 2.182699e-04
Pseudovorticella NA NA 1 1.233169e-04 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 6.650289e-05 0.0000000000 1.061887e-04
Scenedesmus NA NA 1 6.565928e-05 0.0012813198 5.262908e-04 7.129171e-04 0.0000000000 0.0000000000 0.0000000000 3.488916e-04 0.000000e+00 0.0000000000 3.452883e-04
Sessilida_X NA 1 NA 0.000000e+00 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 3.143181e-04 0.0008128807 1.588315e-03
Spongiococcum NA NA 1 0.000000e+00 0.0000000000 0.000000e+00 1.363091e-04 0.0000000000 0.0009046367 0.0007896674 3.338583e-04 0.000000e+00 0.0000000000 0.000000e+00
Spumella 1 1 NA 1.752399e-03 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 6.101571e-03 0.2763408546 1.882008e-03
Stichococcus NA NA 1 0.000000e+00 0.0000000000 3.358388e-05 7.903332e-05 0.0000000000 0.0000000000 0.0000000000 0.000000e+00 0.000000e+00 0.0000000000 0.000000e+00
Tetraselmis NA NA 1 2.983400e-02 0.0578165933 2.160364e-01 4.382103e-02 0.2877727088 0.0579401149 0.1589012787 1.520716e-01 2.884446e-03 0.0107986014 4.702055e-02
Tisochrysis NA NA 1 0.000000e+00 0.0000000000 4.610344e-04 7.707019e-05 0.0008392871 0.0000000000 0.0001981126 0.000000e+00 0.000000e+00 0.0000000000 0.000000e+00

List of all genera with relative abundances per culture setup and time point

biom.18s.genus.1 <- biom.18s.genus
sample_data(biom.18s.genus.1) <- sample_data(biom.18s.genus) %>% as.data.frame() %>% as.matrix() %>% as.data.frame() %>% unite(culture_setup_time, c("Culture_setup", "Time_point"))
temp.genus <- merge_samples(biom.18s.genus.1, "culture_setup_time", fun=sum) %>% transform_sample_counts(function(x) {x / sum(x)}) %>% psmelt()
temp.abund.all <- temp.genus %>% dplyr::select(Sample,Abundance,Genus)
rel.abund.18s <- reshape2::acast(temp.abund.all, Genus~Sample, value.var = "Abundance")
write.table(as.matrix(rel.abund.18s), file = paste0(wdir, "tab-rel-abund-culture-time-18s.csv"), col.names = T, row.names = T, sep = ",", quote = F)
#save workspace
save.image(paste0(wdir, "/phyloseq-18s.Rdata"))

session information

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3;  LAPACK version 3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] splines   stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] NetCoMi_1.1.0               SpiecEasi_1.1.3            
##  [3] DirichletMultinomial_1.46.0 ALDEx2_1.36.0              
##  [5] latticeExtra_0.6-30         zCompositions_1.5.0-3      
##  [7] truncnorm_1.0-9             NADA_1.6-1.1               
##  [9] survival_3.6-4              MASS_7.3-60.2              
## [11] microbiome_1.26.0           factoextra_1.0.7           
## [13] FactoMineR_2.11             ggbiplot_0.6.2             
## [15] knitr_1.47                  ggpubr_0.6.0               
## [17] patchwork_1.2.0             gplots_3.1.3.1             
## [19] edgeR_4.2.0                 limma_3.60.2               
## [21] DESeq2_1.44.0               SummarizedExperiment_1.34.0
## [23] Biobase_2.64.0              MatrixGenerics_1.16.0      
## [25] matrixStats_1.3.0           GenomicRanges_1.56.0       
## [27] GenomeInfoDb_1.40.1         IRanges_2.38.0             
## [29] S4Vectors_0.42.0            BiocGenerics_0.50.0        
## [31] vegan_2.6-6.1               lattice_0.22-6             
## [33] permute_0.9-7               ggrepel_0.9.5              
## [35] RColorBrewer_1.1-3          psadd_0.1.3                
## [37] reshape2_1.4.4              phyloseq_1.48.0            
## [39] lubridate_1.9.3             forcats_1.0.0              
## [41] stringr_1.5.1               dplyr_1.1.4                
## [43] purrr_1.0.2                 readr_2.1.5                
## [45] tidyr_1.3.1                 tibble_3.2.1               
## [47] tidyverse_2.0.0             ggplot2_3.5.1              
## 
## loaded via a namespace (and not attached):
##   [1] nnet_7.3-19             DT_0.33                 Biostrings_2.72.1      
##   [4] TH.data_1.1-2           vctrs_0.6.5             digest_0.6.35          
##   [7] png_0.1-8               corpcor_1.6.10          shape_1.4.6.1          
##  [10] pcaPP_2.0-4             registry_0.5-1          corrplot_0.92          
##  [13] deldir_2.0-4            parallelly_1.37.1       SPRING_1.0.4           
##  [16] foreach_1.5.2           withr_3.0.0             psych_2.4.3            
##  [19] xfun_0.44               doRNG_1.8.6             memoise_2.0.1          
##  [22] emmeans_1.10.2          latentcor_2.0.1         zoo_1.8-12             
##  [25] gtools_3.9.5            pbapply_1.7-2           Formula_1.2-5          
##  [28] KEGGREST_1.44.0         scatterplot3d_0.3-44    httr_1.4.7             
##  [31] rstatix_0.7.2           globals_0.16.3          rhdf5filters_1.16.0    
##  [34] rhdf5_2.48.0            rstudioapi_0.16.0       UCSC.utils_1.0.0       
##  [37] generics_0.1.3          base64enc_0.1-3         zlibbioc_1.50.0        
##  [40] ca_0.71.1               doSNOW_1.0.20           RcppZiggurat_0.1.6     
##  [43] GenomeInfoDbData_1.2.12 quadprog_1.5-8          SparseArray_1.4.8      
##  [46] xtable_1.8-4            ade4_1.7-22             doParallel_1.0.17      
##  [49] evaluate_0.23           S4Arrays_1.4.1          Rfast_2.1.0            
##  [52] preprocessCore_1.66.0   hms_1.1.3               glmnet_4.1-8           
##  [55] pulsar_0.3.11           irlba_2.3.5.1           colorspace_2.1-0       
##  [58] magrittr_2.0.3          viridis_0.6.5           future.apply_1.11.2    
##  [61] cowplot_1.1.3           Hmisc_5.1-3             pillar_1.9.0           
##  [64] nlme_3.1-164            huge_1.3.5              iterators_1.0.14       
##  [67] caTools_1.18.2          compiler_4.4.1          stringi_1.8.4          
##  [70] biomformat_1.32.0       TSP_1.2-4               dendextend_1.17.1      
##  [73] plyr_1.8.9              crayon_1.5.2            abind_1.4-5            
##  [76] timeSeries_4032.109     sn_2.1.1                locfit_1.5-9.9         
##  [79] bit_4.0.5               rootSolve_1.8.2.4       mixedCCA_1.6.2         
##  [82] sandwich_3.1-0          fastcluster_1.2.6       codetools_0.2-20       
##  [85] multcomp_1.4-25         directlabels_2024.1.21  bslib_0.7.0            
##  [88] plotly_4.10.4           multtest_2.60.0         Rcpp_1.0.12            
##  [91] qgraph_1.9.8            magic_1.6-1             interp_1.1-6           
##  [94] leaps_3.1               blob_1.2.4              utf8_1.2.4             
##  [97] fBasics_4032.96         pbivnorm_0.6.0          listenv_0.9.1          
## [100] checkmate_2.3.1         Rdpack_2.6              ggsignif_0.6.4         
## [103] estimability_1.5.1      lavaan_0.6-17           Matrix_1.7-0           
## [106] statmod_1.5.0           tzdb_0.4.0              pkgconfig_2.0.3        
## [109] tools_4.4.1             cachem_1.1.0            rbibutils_2.2.16       
## [112] RSQLite_2.3.7           viridisLite_0.4.2       DBI_1.2.3              
## [115] numDeriv_2016.8-1.1     impute_1.78.0           fastmap_1.2.0          
## [118] rmarkdown_2.27          scales_1.3.0            grid_4.4.1             
## [121] fMultivar_4031.84       broom_1.0.6             sass_0.4.9             
## [124] coda_0.19-4.1           carData_3.0-5           rpart_4.1.23           
## [127] snow_0.4-4              farver_2.1.2            mgcv_1.9-1             
## [130] yaml_2.3.8              VGAM_1.1-11             spatial_7.3-17         
## [133] foreign_0.8-86          cli_3.6.2               webshot_0.5.5          
## [136] lifecycle_1.0.4         mvtnorm_1.2-5           backports_1.5.0        
## [139] BiocParallel_1.38.0     timechange_0.3.0        gtable_0.3.5           
## [142] parallel_4.4.1          ape_5.8                 jsonlite_1.8.8         
## [145] seriation_1.5.5         bitops_1.0-7            multcompView_0.1-10    
## [148] bit64_4.0.5             assertthat_0.2.1        Rtsne_0.17             
## [151] glasso_1.11             doFuture_1.0.1          heatmaply_1.5.0        
## [154] RcppParallel_5.1.7      jquerylib_0.1.4         highr_0.11             
## [157] timeDate_4032.109       orca_1.1-2              lazyeval_0.2.2         
## [160] dynamicTreeCut_1.63-1   htmltools_0.5.8.1       GO.db_3.19.1           
## [163] glue_1.7.0              XVector_0.44.0          microbenchmark_1.4.10  
## [166] mnormt_2.1.1            jpeg_0.1-10             gridExtra_2.3          
## [169] flashClust_1.01-2       igraph_2.0.3            R6_2.5.1               
## [172] fdrtool_1.2.17          labeling_0.4.3          cluster_2.1.6          
## [175] rngtools_1.5.2          Rhdf5lib_1.26.0         DelayedArray_0.30.1    
## [178] tidyselect_1.2.1        plotrix_3.8-4           WGCNA_1.72-5           
## [181] htmlTable_2.4.2         car_3.1-2               AnnotationDbi_1.66.0   
## [184] future_1.33.2           filematrix_1.3          munsell_0.5.1          
## [187] KernSmooth_2.23-24      geometry_0.4.7          data.table_1.15.4      
## [190] htmlwidgets_1.6.4       rlang_1.1.3             fansi_1.0.6